vm.c revision 1.33 1 /* $NetBSD: vm.c,v 1.33 2008/07/20 16:18:13 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/null.h>
48 #include <sys/vnode.h>
49 #include <sys/buf.h>
50 #include <sys/kmem.h>
51
52 #include <uvm/uvm.h>
53 #include <uvm/uvm_prot.h>
54 #include <uvm/uvm_readahead.h>
55
56 #include <machine/pmap.h>
57
58 #include "rump_private.h"
59 #include "rumpuser.h"
60
61 /* dumdidumdum */
62 #define len2npages(off, len) \
63 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
64 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
65
66 static int vn_get(struct uvm_object *, voff_t, struct vm_page **,
67 int *, int, vm_prot_t, int, int);
68 static int vn_put(struct uvm_object *, voff_t, voff_t, int);
69 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
70 int *, int, vm_prot_t, int, int);
71 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
72
73 const struct uvm_pagerops uvm_vnodeops = {
74 .pgo_get = vn_get,
75 .pgo_put = vn_put,
76 };
77 const struct uvm_pagerops aobj_pager = {
78 .pgo_get = ao_get,
79 .pgo_put = ao_put,
80 };
81
82 kmutex_t uvm_pageqlock;
83
84 struct uvmexp uvmexp;
85 struct uvm uvm;
86
87 struct vmspace rump_vmspace;
88 struct vm_map rump_vmmap;
89 const struct rb_tree_ops uvm_page_tree_ops;
90
91 /*
92 * vm pages
93 */
94
95 /* called with the object locked */
96 struct vm_page *
97 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
98 {
99 struct vm_page *pg;
100
101 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
102 pg->offset = off;
103 pg->uobject = uobj;
104
105 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
106 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
107
108 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
109
110 return pg;
111 }
112
113 /*
114 * Release a page.
115 *
116 * Called with the vm object locked.
117 */
118 void
119 uvm_pagefree(struct vm_page *pg)
120 {
121 struct uvm_object *uobj = pg->uobject;
122
123 if (pg->flags & PG_WANTED)
124 wakeup(pg);
125
126 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
127 kmem_free((void *)pg->uanon, PAGE_SIZE);
128 kmem_free(pg, sizeof(*pg));
129 }
130
131 struct rumpva {
132 vaddr_t addr;
133 struct vm_page *pg;
134
135 LIST_ENTRY(rumpva) entries;
136 };
137 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
138 static kmutex_t rvamtx;
139
140 void
141 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
142 {
143 struct rumpva *rva;
144
145 rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP);
146 rva->addr = addr;
147 rva->pg = pg;
148 mutex_enter(&rvamtx);
149 LIST_INSERT_HEAD(&rvahead, rva, entries);
150 mutex_exit(&rvamtx);
151 }
152
153 void
154 rumpvm_flushva()
155 {
156 struct rumpva *rva;
157
158 mutex_enter(&rvamtx);
159 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
160 LIST_REMOVE(rva, entries);
161 kmem_free(rva, sizeof(*rva));
162 }
163 mutex_exit(&rvamtx);
164 }
165
166 /*
167 * vnode pager
168 */
169
170 static int
171 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
172 int *npages, int centeridx, vm_prot_t access_type,
173 int advice, int flags)
174 {
175 struct vnode *vp = (struct vnode *)uobj;
176
177 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
178 advice, flags);
179 }
180
181 static int
182 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
183 {
184 struct vnode *vp = (struct vnode *)uobj;
185
186 return VOP_PUTPAGES(vp, offlo, offhi, flags);
187 }
188
189 /*
190 * Anon object stuff
191 */
192
193 static int
194 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
195 int *npages, int centeridx, vm_prot_t access_type,
196 int advice, int flags)
197 {
198 struct vm_page *pg;
199 int i;
200
201 if (centeridx)
202 panic("%s: centeridx != 0 not supported", __func__);
203
204 /* loop over pages */
205 off = trunc_page(off);
206 for (i = 0; i < *npages; i++) {
207 retrylookup:
208 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
209 if (pg) {
210 if (pg->flags & PG_BUSY) {
211 pg->flags |= PG_WANTED;
212 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
213 "aogetpg", 0);
214 goto retrylookup;
215 }
216 pg->flags |= PG_BUSY;
217 pgs[i] = pg;
218 } else {
219 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
220 pgs[i] = pg;
221 }
222 }
223 mutex_exit(&uobj->vmobjlock);
224
225 return 0;
226
227 }
228
229 static int
230 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
231 {
232 struct vm_page *pg;
233
234 /* we only free all pages for now */
235 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
236 mutex_exit(&uobj->vmobjlock);
237 return 0;
238 }
239
240 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
241 uvm_pagefree(pg);
242 mutex_exit(&uobj->vmobjlock);
243
244 return 0;
245 }
246
247 struct uvm_object *
248 uao_create(vsize_t size, int flags)
249 {
250 struct uvm_object *uobj;
251
252 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
253 uobj->pgops = &aobj_pager;
254 TAILQ_INIT(&uobj->memq);
255 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
256
257 return uobj;
258 }
259
260 void
261 uao_detach(struct uvm_object *uobj)
262 {
263
264 mutex_enter(&uobj->vmobjlock);
265 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
266 kmem_free(uobj, sizeof(*uobj));
267 }
268
269 /*
270 * UBC
271 */
272
273 struct ubc_window {
274 struct uvm_object *uwin_obj;
275 voff_t uwin_off;
276 uint8_t *uwin_mem;
277 size_t uwin_mapsize;
278
279 LIST_ENTRY(ubc_window) uwin_entries;
280 };
281
282 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
283 static kmutex_t uwinmtx;
284
285 int
286 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
287 struct ubc_window *uwinp)
288 {
289 struct vm_page **pgs;
290 int npages = len2npages(uio->uio_offset, n);
291 size_t allocsize;
292 int i, rv;
293
294 if (uwinp == NULL) {
295 mutex_enter(&uwinmtx);
296 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
297 if ((uint8_t *)va >= uwinp->uwin_mem
298 && (uint8_t *)va
299 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
300 break;
301 mutex_exit(&uwinmtx);
302 if (uwinp == NULL) {
303 KASSERT(rvp != NULL);
304 return 0;
305 }
306 }
307
308 allocsize = npages * sizeof(pgs);
309 pgs = kmem_zalloc(allocsize, KM_SLEEP);
310 mutex_enter(&uwinp->uwin_obj->vmobjlock);
311 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
312 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
313 pgs, &npages, 0, 0, 0, 0);
314 if (rv)
315 goto out;
316
317 for (i = 0; i < npages; i++) {
318 size_t xfersize;
319 off_t pageoff;
320
321 pageoff = uio->uio_offset & PAGE_MASK;
322 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
323 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
324 if (uio->uio_rw == UIO_WRITE)
325 pgs[i]->flags &= ~PG_CLEAN;
326 n -= xfersize;
327 }
328 uvm_page_unbusy(pgs, npages);
329
330 out:
331 kmem_free(pgs, allocsize);
332 if (rvp)
333 *rvp = rv;
334 return 1;
335 }
336
337 static struct ubc_window *
338 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
339 {
340 struct ubc_window *uwinp; /* pronounced: you wimp! */
341
342 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
343 uwinp->uwin_obj = uobj;
344 uwinp->uwin_off = off;
345 uwinp->uwin_mapsize = len;
346 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
347
348 return uwinp;
349 }
350
351 static void
352 uwin_free(struct ubc_window *uwinp)
353 {
354
355 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
356 kmem_free(uwinp, sizeof(struct ubc_window));
357 }
358
359 void *
360 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
361 int flags)
362 {
363 struct ubc_window *uwinp;
364
365 uwinp = uwin_alloc(uobj, offset, *lenp);
366 mutex_enter(&uwinmtx);
367 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
368 mutex_exit(&uwinmtx);
369
370 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
371 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
372
373 return uwinp->uwin_mem;
374 }
375
376 void
377 ubc_release(void *va, int flags)
378 {
379 struct ubc_window *uwinp;
380
381 mutex_enter(&uwinmtx);
382 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
383 if ((uint8_t *)va >= uwinp->uwin_mem
384 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
385 break;
386 mutex_exit(&uwinmtx);
387 if (uwinp == NULL)
388 panic("%s: releasing invalid window at %p", __func__, va);
389
390 LIST_REMOVE(uwinp, uwin_entries);
391 uwin_free(uwinp);
392 }
393
394 int
395 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
396 int advice, int flags)
397 {
398 struct ubc_window *uwinp;
399 vsize_t len;
400
401 while (todo > 0) {
402 len = todo;
403
404 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
405 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
406 uwin_free(uwinp);
407
408 todo -= len;
409 }
410 return 0;
411 }
412
413
414 /*
415 * Misc routines
416 */
417
418 void
419 rumpvm_init()
420 {
421
422 uvmexp.free = 1024*1024; /* XXX */
423 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
424
425 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
426 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
427 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
428 }
429
430 void
431 uvm_pageactivate(struct vm_page *pg)
432 {
433
434 /* nada */
435 }
436
437 void
438 uvm_pagewire(struct vm_page *pg)
439 {
440
441 /* nada */
442 }
443
444 void
445 uvm_pageunwire(struct vm_page *pg)
446 {
447
448 /* nada */
449 }
450
451 vaddr_t
452 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
453 {
454
455 panic("%s: unimplemented", __func__);
456 }
457
458 /* Called with the vm object locked */
459 struct vm_page *
460 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
461 {
462 struct vm_page *pg;
463
464 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
465 if (pg->offset == off) {
466 return pg;
467 }
468 }
469
470 return NULL;
471 }
472
473 struct vm_page *
474 uvm_pageratop(vaddr_t va)
475 {
476 struct rumpva *rva;
477
478 mutex_enter(&rvamtx);
479 LIST_FOREACH(rva, &rvahead, entries)
480 if (rva->addr == va)
481 break;
482 mutex_exit(&rvamtx);
483
484 if (rva == NULL)
485 panic("%s: va %llu", __func__, (unsigned long long)va);
486
487 return rva->pg;
488 }
489
490 void
491 uvm_page_unbusy(struct vm_page **pgs, int npgs)
492 {
493 struct vm_page *pg;
494 int i;
495
496 for (i = 0; i < npgs; i++) {
497 pg = pgs[i];
498 if (pg == NULL)
499 continue;
500
501 KASSERT(pg->flags & PG_BUSY);
502 if (pg->flags & PG_WANTED)
503 wakeup(pg);
504 pg->flags &= ~(PG_WANTED|PG_BUSY);
505 }
506 }
507
508 void
509 uvm_estimatepageable(int *active, int *inactive)
510 {
511
512 /* XXX: guessing game */
513 *active = 1024;
514 *inactive = 1024;
515 }
516
517 void
518 uvm_aio_biodone1(struct buf *bp)
519 {
520
521 panic("%s: unimplemented", __func__);
522 }
523
524 void
525 uvm_aio_biodone(struct buf *bp)
526 {
527
528 uvm_aio_aiodone(bp);
529 }
530
531 void
532 uvm_aio_aiodone(struct buf *bp)
533 {
534
535 if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
536 bioopsp->io_pageiodone(bp);
537 }
538
539 void
540 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
541 {
542
543 mutex_enter(&vp->v_interlock);
544 vp->v_size = vp->v_writesize = newsize;
545 mutex_exit(&vp->v_interlock);
546 }
547
548 void
549 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
550 {
551
552 mutex_enter(&vp->v_interlock);
553 vp->v_writesize = newsize;
554 mutex_exit(&vp->v_interlock);
555 }
556
557 void
558 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
559 {
560 struct uvm_object *uobj = &vp->v_uobj;
561 struct vm_page **pgs;
562 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
563 int rv, npages, i;
564
565 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
566 while (len) {
567 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
568 memset(pgs, 0, npages * sizeof(struct vm_page *));
569 mutex_enter(&uobj->vmobjlock);
570 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
571 KASSERT(npages > 0);
572
573 for (i = 0; i < npages; i++) {
574 uint8_t *start;
575 size_t chunkoff, chunklen;
576
577 chunkoff = off & PAGE_MASK;
578 chunklen = MIN(PAGE_SIZE - chunkoff, len);
579 start = (uint8_t *)pgs[i]->uanon + chunkoff;
580
581 memset(start, 0, chunklen);
582 pgs[i]->flags &= PG_CLEAN;
583
584 off += chunklen;
585 len -= chunklen;
586 }
587 uvm_page_unbusy(pgs, npages);
588 }
589 kmem_free(pgs, maxpages * sizeof(pgs));
590
591 return;
592 }
593
594 struct uvm_ractx *
595 uvm_ra_allocctx()
596 {
597
598 return NULL;
599 }
600
601 void
602 uvm_ra_freectx(struct uvm_ractx *ra)
603 {
604
605 return;
606 }
607
608 bool
609 uvn_clean_p(struct uvm_object *uobj)
610 {
611 struct vnode *vp = (void *)uobj;
612
613 return (vp->v_iflag & VI_ONWORKLST) == 0;
614 }
615
616 /*
617 * Kmem
618 */
619
620 void *
621 kmem_alloc(size_t size, km_flag_t kmflag)
622 {
623
624 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
625 }
626
627 void *
628 kmem_zalloc(size_t size, km_flag_t kmflag)
629 {
630 void *rv;
631
632 rv = kmem_alloc(size, kmflag);
633 if (rv)
634 memset(rv, 0, size);
635
636 return rv;
637 }
638
639 void
640 kmem_free(void *p, size_t size)
641 {
642
643 rumpuser_free(p);
644 }
645
646 /*
647 * UVM km
648 */
649
650 vaddr_t
651 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
652 {
653 void *rv;
654
655 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
656 if (rv && flags & UVM_KMF_ZERO)
657 memset(rv, 0, size);
658
659 return (vaddr_t)rv;
660 }
661
662 void
663 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
664 {
665
666 rumpuser_free((void *)vaddr);
667 }
668
669 struct vm_map *
670 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
671 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
672 {
673
674 return (struct vm_map *)417416;
675 }
676
677 void
678 uvm_pageout_start(int npages)
679 {
680
681 uvmexp.paging += npages;
682 }
683
684 void
685 uvm_pageout_done(int npages)
686 {
687
688 uvmexp.paging -= npages;
689
690 /*
691 * wake up either of pagedaemon or LWPs waiting for it.
692 */
693
694 if (uvmexp.free <= uvmexp.reserve_kernel) {
695 wakeup(&uvm.pagedaemon);
696 } else {
697 wakeup(&uvmexp.free);
698 }
699 }
700