vm.c revision 1.32.2.2 1 /* $NetBSD: vm.c,v 1.32.2.2 2008/07/31 04:51:05 simonb Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/null.h>
48 #include <sys/vnode.h>
49 #include <sys/buf.h>
50 #include <sys/kmem.h>
51
52 #include <machine/pmap.h>
53
54 #include <rump/rumpuser.h>
55
56 #include <uvm/uvm.h>
57 #include <uvm/uvm_prot.h>
58 #include <uvm/uvm_readahead.h>
59
60 #include "rump_private.h"
61
62 /* dumdidumdum */
63 #define len2npages(off, len) \
64 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
65 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
66
67 static int vn_get(struct uvm_object *, voff_t, struct vm_page **,
68 int *, int, vm_prot_t, int, int);
69 static int vn_put(struct uvm_object *, voff_t, voff_t, int);
70 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
71 int *, int, vm_prot_t, int, int);
72 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
73
74 const struct uvm_pagerops uvm_vnodeops = {
75 .pgo_get = vn_get,
76 .pgo_put = vn_put,
77 };
78 const struct uvm_pagerops aobj_pager = {
79 .pgo_get = ao_get,
80 .pgo_put = ao_put,
81 };
82
83 kmutex_t uvm_pageqlock;
84
85 struct uvmexp uvmexp;
86 struct uvm uvm;
87
88 struct vmspace rump_vmspace;
89 struct vm_map rump_vmmap;
90 const struct rb_tree_ops uvm_page_tree_ops;
91
92 /*
93 * vm pages
94 */
95
96 /* called with the object locked */
97 struct vm_page *
98 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
99 {
100 struct vm_page *pg;
101
102 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
103 pg->offset = off;
104 pg->uobject = uobj;
105
106 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
107 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
108
109 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
110
111 return pg;
112 }
113
114 /*
115 * Release a page.
116 *
117 * Called with the vm object locked.
118 */
119 void
120 uvm_pagefree(struct vm_page *pg)
121 {
122 struct uvm_object *uobj = pg->uobject;
123
124 if (pg->flags & PG_WANTED)
125 wakeup(pg);
126
127 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
128 kmem_free((void *)pg->uanon, PAGE_SIZE);
129 kmem_free(pg, sizeof(*pg));
130 }
131
132 struct rumpva {
133 vaddr_t addr;
134 struct vm_page *pg;
135
136 LIST_ENTRY(rumpva) entries;
137 };
138 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
139 static kmutex_t rvamtx;
140
141 void
142 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
143 {
144 struct rumpva *rva;
145
146 rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP);
147 rva->addr = addr;
148 rva->pg = pg;
149 mutex_enter(&rvamtx);
150 LIST_INSERT_HEAD(&rvahead, rva, entries);
151 mutex_exit(&rvamtx);
152 }
153
154 void
155 rumpvm_flushva()
156 {
157 struct rumpva *rva;
158
159 mutex_enter(&rvamtx);
160 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
161 LIST_REMOVE(rva, entries);
162 kmem_free(rva, sizeof(*rva));
163 }
164 mutex_exit(&rvamtx);
165 }
166
167 /*
168 * vnode pager
169 */
170
171 static int
172 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
173 int *npages, int centeridx, vm_prot_t access_type,
174 int advice, int flags)
175 {
176 struct vnode *vp = (struct vnode *)uobj;
177
178 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
179 advice, flags);
180 }
181
182 static int
183 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
184 {
185 struct vnode *vp = (struct vnode *)uobj;
186
187 return VOP_PUTPAGES(vp, offlo, offhi, flags);
188 }
189
190 /*
191 * Anon object stuff
192 */
193
194 static int
195 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
196 int *npages, int centeridx, vm_prot_t access_type,
197 int advice, int flags)
198 {
199 struct vm_page *pg;
200 int i;
201
202 if (centeridx)
203 panic("%s: centeridx != 0 not supported", __func__);
204
205 /* loop over pages */
206 off = trunc_page(off);
207 for (i = 0; i < *npages; i++) {
208 retrylookup:
209 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
210 if (pg) {
211 if (pg->flags & PG_BUSY) {
212 pg->flags |= PG_WANTED;
213 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
214 "aogetpg", 0);
215 goto retrylookup;
216 }
217 pg->flags |= PG_BUSY;
218 pgs[i] = pg;
219 } else {
220 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
221 pgs[i] = pg;
222 }
223 }
224 mutex_exit(&uobj->vmobjlock);
225
226 return 0;
227
228 }
229
230 static int
231 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
232 {
233 struct vm_page *pg;
234
235 /* we only free all pages for now */
236 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
237 mutex_exit(&uobj->vmobjlock);
238 return 0;
239 }
240
241 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
242 uvm_pagefree(pg);
243 mutex_exit(&uobj->vmobjlock);
244
245 return 0;
246 }
247
248 struct uvm_object *
249 uao_create(vsize_t size, int flags)
250 {
251 struct uvm_object *uobj;
252
253 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
254 uobj->pgops = &aobj_pager;
255 TAILQ_INIT(&uobj->memq);
256 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
257
258 return uobj;
259 }
260
261 void
262 uao_detach(struct uvm_object *uobj)
263 {
264
265 mutex_enter(&uobj->vmobjlock);
266 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
267 kmem_free(uobj, sizeof(*uobj));
268 }
269
270 /*
271 * UBC
272 */
273
274 struct ubc_window {
275 struct uvm_object *uwin_obj;
276 voff_t uwin_off;
277 uint8_t *uwin_mem;
278 size_t uwin_mapsize;
279
280 LIST_ENTRY(ubc_window) uwin_entries;
281 };
282
283 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
284 static kmutex_t uwinmtx;
285
286 int
287 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
288 struct ubc_window *uwinp)
289 {
290 struct vm_page **pgs;
291 int npages = len2npages(uio->uio_offset, n);
292 size_t allocsize;
293 int i, rv;
294
295 if (uwinp == NULL) {
296 mutex_enter(&uwinmtx);
297 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
298 if ((uint8_t *)va >= uwinp->uwin_mem
299 && (uint8_t *)va
300 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
301 break;
302 mutex_exit(&uwinmtx);
303 if (uwinp == NULL) {
304 KASSERT(rvp != NULL);
305 return 0;
306 }
307 }
308
309 allocsize = npages * sizeof(pgs);
310 pgs = kmem_zalloc(allocsize, KM_SLEEP);
311 mutex_enter(&uwinp->uwin_obj->vmobjlock);
312 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
313 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
314 pgs, &npages, 0, 0, 0, 0);
315 if (rv)
316 goto out;
317
318 for (i = 0; i < npages; i++) {
319 size_t xfersize;
320 off_t pageoff;
321
322 pageoff = uio->uio_offset & PAGE_MASK;
323 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
324 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
325 if (uio->uio_rw == UIO_WRITE)
326 pgs[i]->flags &= ~PG_CLEAN;
327 n -= xfersize;
328 }
329 uvm_page_unbusy(pgs, npages);
330
331 out:
332 kmem_free(pgs, allocsize);
333 if (rvp)
334 *rvp = rv;
335 return 1;
336 }
337
338 static struct ubc_window *
339 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
340 {
341 struct ubc_window *uwinp; /* pronounced: you wimp! */
342
343 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
344 uwinp->uwin_obj = uobj;
345 uwinp->uwin_off = off;
346 uwinp->uwin_mapsize = len;
347 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
348
349 return uwinp;
350 }
351
352 static void
353 uwin_free(struct ubc_window *uwinp)
354 {
355
356 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
357 kmem_free(uwinp, sizeof(struct ubc_window));
358 }
359
360 void *
361 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
362 int flags)
363 {
364 struct ubc_window *uwinp;
365
366 uwinp = uwin_alloc(uobj, offset, *lenp);
367 mutex_enter(&uwinmtx);
368 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
369 mutex_exit(&uwinmtx);
370
371 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
372 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
373
374 return uwinp->uwin_mem;
375 }
376
377 void
378 ubc_release(void *va, int flags)
379 {
380 struct ubc_window *uwinp;
381
382 mutex_enter(&uwinmtx);
383 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
384 if ((uint8_t *)va >= uwinp->uwin_mem
385 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
386 break;
387 mutex_exit(&uwinmtx);
388 if (uwinp == NULL)
389 panic("%s: releasing invalid window at %p", __func__, va);
390
391 LIST_REMOVE(uwinp, uwin_entries);
392 uwin_free(uwinp);
393 }
394
395 int
396 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
397 int advice, int flags)
398 {
399 struct ubc_window *uwinp;
400 vsize_t len;
401
402 while (todo > 0) {
403 len = todo;
404
405 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
406 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
407 uwin_free(uwinp);
408
409 todo -= len;
410 }
411 return 0;
412 }
413
414
415 /*
416 * Misc routines
417 */
418
419 void
420 rumpvm_init()
421 {
422
423 uvmexp.free = 1024*1024; /* XXX */
424 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
425
426 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
427 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
428 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
429 }
430
431 void
432 uvm_pageactivate(struct vm_page *pg)
433 {
434
435 /* nada */
436 }
437
438 void
439 uvm_pagewire(struct vm_page *pg)
440 {
441
442 /* nada */
443 }
444
445 void
446 uvm_pageunwire(struct vm_page *pg)
447 {
448
449 /* nada */
450 }
451
452 vaddr_t
453 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
454 {
455
456 panic("%s: unimplemented", __func__);
457 }
458
459 /* Called with the vm object locked */
460 struct vm_page *
461 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
462 {
463 struct vm_page *pg;
464
465 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
466 if (pg->offset == off) {
467 return pg;
468 }
469 }
470
471 return NULL;
472 }
473
474 struct vm_page *
475 uvm_pageratop(vaddr_t va)
476 {
477 struct rumpva *rva;
478
479 mutex_enter(&rvamtx);
480 LIST_FOREACH(rva, &rvahead, entries)
481 if (rva->addr == va)
482 break;
483 mutex_exit(&rvamtx);
484
485 if (rva == NULL)
486 panic("%s: va %llu", __func__, (unsigned long long)va);
487
488 return rva->pg;
489 }
490
491 void
492 uvm_page_unbusy(struct vm_page **pgs, int npgs)
493 {
494 struct vm_page *pg;
495 int i;
496
497 for (i = 0; i < npgs; i++) {
498 pg = pgs[i];
499 if (pg == NULL)
500 continue;
501
502 KASSERT(pg->flags & PG_BUSY);
503 if (pg->flags & PG_WANTED)
504 wakeup(pg);
505 pg->flags &= ~(PG_WANTED|PG_BUSY);
506 }
507 }
508
509 void
510 uvm_estimatepageable(int *active, int *inactive)
511 {
512
513 /* XXX: guessing game */
514 *active = 1024;
515 *inactive = 1024;
516 }
517
518 void
519 uvm_aio_biodone1(struct buf *bp)
520 {
521
522 panic("%s: unimplemented", __func__);
523 }
524
525 void
526 uvm_aio_biodone(struct buf *bp)
527 {
528
529 uvm_aio_aiodone(bp);
530 }
531
532 void
533 uvm_aio_aiodone(struct buf *bp)
534 {
535
536 if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
537 bioopsp->io_pageiodone(bp);
538 }
539
540 void
541 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
542 {
543
544 mutex_enter(&vp->v_interlock);
545 vp->v_size = vp->v_writesize = newsize;
546 mutex_exit(&vp->v_interlock);
547 }
548
549 void
550 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
551 {
552
553 mutex_enter(&vp->v_interlock);
554 vp->v_writesize = newsize;
555 mutex_exit(&vp->v_interlock);
556 }
557
558 void
559 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
560 {
561 struct uvm_object *uobj = &vp->v_uobj;
562 struct vm_page **pgs;
563 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
564 int rv, npages, i;
565
566 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
567 while (len) {
568 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
569 memset(pgs, 0, npages * sizeof(struct vm_page *));
570 mutex_enter(&uobj->vmobjlock);
571 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
572 KASSERT(npages > 0);
573
574 for (i = 0; i < npages; i++) {
575 uint8_t *start;
576 size_t chunkoff, chunklen;
577
578 chunkoff = off & PAGE_MASK;
579 chunklen = MIN(PAGE_SIZE - chunkoff, len);
580 start = (uint8_t *)pgs[i]->uanon + chunkoff;
581
582 memset(start, 0, chunklen);
583 pgs[i]->flags &= PG_CLEAN;
584
585 off += chunklen;
586 len -= chunklen;
587 }
588 uvm_page_unbusy(pgs, npages);
589 }
590 kmem_free(pgs, maxpages * sizeof(pgs));
591
592 return;
593 }
594
595 struct uvm_ractx *
596 uvm_ra_allocctx()
597 {
598
599 return NULL;
600 }
601
602 void
603 uvm_ra_freectx(struct uvm_ractx *ra)
604 {
605
606 return;
607 }
608
609 bool
610 uvn_clean_p(struct uvm_object *uobj)
611 {
612 struct vnode *vp = (void *)uobj;
613
614 return (vp->v_iflag & VI_ONWORKLST) == 0;
615 }
616
617 /*
618 * Kmem
619 */
620
621 void *
622 kmem_alloc(size_t size, km_flag_t kmflag)
623 {
624
625 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
626 }
627
628 void *
629 kmem_zalloc(size_t size, km_flag_t kmflag)
630 {
631 void *rv;
632
633 rv = kmem_alloc(size, kmflag);
634 if (rv)
635 memset(rv, 0, size);
636
637 return rv;
638 }
639
640 void
641 kmem_free(void *p, size_t size)
642 {
643
644 rumpuser_free(p);
645 }
646
647 /*
648 * UVM km
649 */
650
651 vaddr_t
652 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
653 {
654 void *rv;
655
656 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
657 if (rv && flags & UVM_KMF_ZERO)
658 memset(rv, 0, size);
659
660 return (vaddr_t)rv;
661 }
662
663 void
664 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
665 {
666
667 rumpuser_free((void *)vaddr);
668 }
669
670 struct vm_map *
671 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
672 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
673 {
674
675 return (struct vm_map *)417416;
676 }
677
678 void
679 uvm_pageout_start(int npages)
680 {
681
682 uvmexp.paging += npages;
683 }
684
685 void
686 uvm_pageout_done(int npages)
687 {
688
689 uvmexp.paging -= npages;
690
691 /*
692 * wake up either of pagedaemon or LWPs waiting for it.
693 */
694
695 if (uvmexp.free <= uvmexp.reserve_kernel) {
696 wakeup(&uvm.pagedaemon);
697 } else {
698 wakeup(&uvmexp.free);
699 }
700 }
701