vm.c revision 1.26 1 /* $NetBSD: vm.c,v 1.26 2008/01/02 15:44:04 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/null.h>
48 #include <sys/vnode.h>
49 #include <sys/buf.h>
50 #include <sys/kmem.h>
51
52 #include <uvm/uvm.h>
53 #include <uvm/uvm_prot.h>
54 #include <uvm/uvm_readahead.h>
55
56 #include <machine/pmap.h>
57
58 #include "rump_private.h"
59 #include "rumpuser.h"
60
61 /* dumdidumdum */
62 #define len2npages(off, len) \
63 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
64 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
65
66 static int vn_get(struct uvm_object *, voff_t, struct vm_page **,
67 int *, int, vm_prot_t, int, int);
68 static int vn_put(struct uvm_object *, voff_t, voff_t, int);
69 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
70 int *, int, vm_prot_t, int, int);
71 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
72
73 const struct uvm_pagerops uvm_vnodeops = {
74 .pgo_get = vn_get,
75 .pgo_put = vn_put,
76 };
77 const struct uvm_pagerops aobj_pager = {
78 .pgo_get = ao_get,
79 .pgo_put = ao_put,
80 };
81
82 kmutex_t uvm_pageqlock;
83
84 struct uvmexp uvmexp;
85 struct uvm uvm;
86
87 struct vmspace rump_vmspace;
88 struct vm_map rump_vmmap;
89
90 /*
91 * vm pages
92 */
93
94 /* called with the object locked */
95 struct vm_page *
96 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
97 {
98 struct vm_page *pg;
99
100 pg = rumpuser_malloc(sizeof(struct vm_page), 0);
101 memset(pg, 0, sizeof(struct vm_page));
102 pg->offset = off;
103 pg->uobject = uobj;
104
105 pg->uanon = (void *)rumpuser_malloc(PAGE_SIZE, 0);
106 memset((void *)pg->uanon, 0, PAGE_SIZE);
107 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
108
109 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq);
110
111 return pg;
112 }
113
114 /*
115 * Release a page.
116 *
117 * Called with the vm object locked.
118 */
119 void
120 uvm_pagefree(struct vm_page *pg)
121 {
122 struct uvm_object *uobj = pg->uobject;
123
124 if (pg->flags & PG_WANTED)
125 wakeup(pg);
126
127 TAILQ_REMOVE(&uobj->memq, pg, listq);
128 rumpuser_free((void *)pg->uanon);
129 rumpuser_free(pg);
130 }
131
132 struct rumpva {
133 vaddr_t addr;
134 struct vm_page *pg;
135
136 LIST_ENTRY(rumpva) entries;
137 };
138 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
139 static kmutex_t rvamtx;
140
141 void
142 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
143 {
144 struct rumpva *rva;
145
146 rva = rumpuser_malloc(sizeof(struct rumpva), 0);
147 rva->addr = addr;
148 rva->pg = pg;
149 mutex_enter(&rvamtx);
150 LIST_INSERT_HEAD(&rvahead, rva, entries);
151 mutex_exit(&rvamtx);
152 }
153
154 void
155 rumpvm_flushva()
156 {
157 struct rumpva *rva;
158
159 mutex_enter(&rvamtx);
160 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
161 LIST_REMOVE(rva, entries);
162 rumpuser_free(rva);
163 }
164 mutex_exit(&rvamtx);
165 }
166
167 /*
168 * vnode pager
169 */
170
171 static int
172 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
173 int *npages, int centeridx, vm_prot_t access_type,
174 int advice, int flags)
175 {
176 struct vnode *vp = (struct vnode *)uobj;
177
178 mutex_enter(&vp->v_interlock);
179 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
180 advice, flags);
181 }
182
183 static int
184 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
185 {
186 struct vnode *vp = (struct vnode *)uobj;
187
188 mutex_enter(&vp->v_interlock);
189 return VOP_PUTPAGES(vp, offlo, offhi, flags);
190 }
191
192 /*
193 * Anon object stuff
194 */
195
196 static int
197 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
198 int *npages, int centeridx, vm_prot_t access_type,
199 int advice, int flags)
200 {
201 struct vm_page *pg;
202 int i;
203
204 if (centeridx)
205 panic("%s: centeridx != 0 not supported", __func__);
206
207 /* loop over pages */
208 off = trunc_page(off);
209 for (i = 0; i < *npages; i++) {
210 retrylookup:
211 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
212 if (pg) {
213 if (pg->flags & PG_BUSY) {
214 pg->flags |= PG_WANTED;
215 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
216 "aogetpg", 0);
217 goto retrylookup;
218 }
219 pg->flags |= PG_BUSY;
220 pgs[i] = pg;
221 } else {
222 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
223 pgs[i] = pg;
224 }
225 }
226 mutex_exit(&uobj->vmobjlock);
227
228 return 0;
229
230 }
231
232 static int
233 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
234 {
235 struct vm_page *pg;
236
237 /* we only free all pages for now */
238 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
239 mutex_exit(&uobj->vmobjlock);
240 return 0;
241 }
242
243 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
244 uvm_pagefree(pg);
245 mutex_exit(&uobj->vmobjlock);
246
247 return 0;
248 }
249
250 struct uvm_object *
251 uao_create(vsize_t size, int flags)
252 {
253 struct uvm_object *uobj;
254
255 uobj = rumpuser_malloc(sizeof(struct uvm_object), 0);
256 memset(uobj, 0, sizeof(struct uvm_object));
257 uobj->pgops = &aobj_pager;
258 TAILQ_INIT(&uobj->memq);
259 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
260
261 return uobj;
262 }
263
264 void
265 uao_detach(struct uvm_object *uobj)
266 {
267
268 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
269 rumpuser_free(uobj);
270 }
271
272 /*
273 * UBC
274 */
275
276 struct ubc_window {
277 struct uvm_object *uwin_obj;
278 voff_t uwin_off;
279 uint8_t *uwin_mem;
280 size_t uwin_mapsize;
281
282 LIST_ENTRY(ubc_window) uwin_entries;
283 };
284
285 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
286 static kmutex_t uwinmtx;
287
288 int
289 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
290 struct ubc_window *uwinp)
291 {
292 struct vm_page **pgs;
293 int npages = len2npages(uio->uio_offset, n);
294 size_t allocsize;
295 int i, rv;
296
297 if (uwinp == NULL) {
298 mutex_enter(&uwinmtx);
299 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
300 if ((uint8_t *)va >= uwinp->uwin_mem
301 && (uint8_t *)va
302 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
303 break;
304 mutex_exit(&uwinmtx);
305 if (uwinp == NULL) {
306 KASSERT(rvp != NULL);
307 return 0;
308 }
309 }
310
311 allocsize = npages * sizeof(pgs);
312 pgs = kmem_zalloc(allocsize, KM_SLEEP);
313 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
314 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
315 pgs, &npages, 0, 0, 0, 0);
316 if (rv)
317 goto out;
318
319 for (i = 0; i < npages; i++) {
320 size_t xfersize;
321 off_t pageoff;
322
323 pageoff = uio->uio_offset & PAGE_MASK;
324 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
325 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
326 if (uio->uio_rw == UIO_WRITE)
327 pgs[i]->flags &= ~PG_CLEAN;
328 n -= xfersize;
329 }
330 uvm_page_unbusy(pgs, npages);
331
332 out:
333 kmem_free(pgs, allocsize);
334 if (rvp)
335 *rvp = rv;
336 return 1;
337 }
338
339 static struct ubc_window *
340 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
341 {
342 struct ubc_window *uwinp; /* pronounced: you wimp! */
343
344 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
345 uwinp->uwin_obj = uobj;
346 uwinp->uwin_off = off;
347 uwinp->uwin_mapsize = len;
348 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
349
350 return uwinp;
351 }
352
353 static void
354 uwin_free(struct ubc_window *uwinp)
355 {
356
357 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
358 kmem_free(uwinp, sizeof(struct ubc_window));
359 }
360
361 void *
362 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
363 int flags)
364 {
365 struct ubc_window *uwinp;
366
367 uwinp = uwin_alloc(uobj, offset, *lenp);
368 mutex_enter(&uwinmtx);
369 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
370 mutex_exit(&uwinmtx);
371
372 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
373 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
374
375 return uwinp->uwin_mem;
376 }
377
378 void
379 ubc_release(void *va, int flags)
380 {
381 struct ubc_window *uwinp;
382
383 mutex_enter(&uwinmtx);
384 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
385 if ((uint8_t *)va >= uwinp->uwin_mem
386 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
387 break;
388 mutex_exit(&uwinmtx);
389 if (uwinp == NULL)
390 panic("%s: releasing invalid window at %p", __func__, va);
391
392 LIST_REMOVE(uwinp, uwin_entries);
393 uwin_free(uwinp);
394 }
395
396 int
397 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
398 int advice, int flags)
399 {
400 struct ubc_window *uwinp;
401 vsize_t len;
402
403 while (todo > 0) {
404 len = todo;
405
406 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
407 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
408 uwin_free(uwinp);
409
410 todo -= len;
411 }
412 return 0;
413 }
414
415
416 /*
417 * Misc routines
418 */
419
420 void
421 rumpvm_init()
422 {
423
424 uvmexp.free = 1024*1024; /* XXX */
425 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
426
427 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
428 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
429 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
430 }
431
432 void
433 uvm_pageactivate(struct vm_page *pg)
434 {
435
436 /* nada */
437 }
438
439 void
440 uvm_pagewire(struct vm_page *pg)
441 {
442
443 /* nada */
444 }
445
446 void
447 uvm_pageunwire(struct vm_page *pg)
448 {
449
450 /* nada */
451 }
452
453 vaddr_t
454 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
455 {
456
457 panic("%s: unimplemented", __func__);
458 }
459
460 /* Called with the vm object locked */
461 struct vm_page *
462 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
463 {
464 struct vm_page *pg;
465
466 TAILQ_FOREACH(pg, &uobj->memq, listq) {
467 if (pg->offset == off) {
468 return pg;
469 }
470 }
471
472 return NULL;
473 }
474
475 struct vm_page *
476 uvm_pageratop(vaddr_t va)
477 {
478 struct rumpva *rva;
479
480 mutex_enter(&rvamtx);
481 LIST_FOREACH(rva, &rvahead, entries)
482 if (rva->addr == va)
483 break;
484 mutex_exit(&rvamtx);
485
486 if (rva == NULL)
487 panic("%s: va %llu", __func__, (unsigned long long)va);
488
489 return rva->pg;
490 }
491
492 void
493 uvm_page_unbusy(struct vm_page **pgs, int npgs)
494 {
495 struct vm_page *pg;
496 int i;
497
498 for (i = 0; i < npgs; i++) {
499 pg = pgs[i];
500 if (pg == NULL)
501 continue;
502
503 KASSERT(pg->flags & PG_BUSY);
504 if (pg->flags & PG_WANTED)
505 wakeup(pg);
506 pg->flags &= ~(PG_WANTED|PG_BUSY);
507 }
508 }
509
510 void
511 uvm_estimatepageable(int *active, int *inactive)
512 {
513
514 /* XXX: guessing game */
515 *active = 1024;
516 *inactive = 1024;
517 }
518
519 void
520 uvm_aio_biodone1(struct buf *bp)
521 {
522
523 panic("%s: unimplemented", __func__);
524 }
525
526 void
527 uvm_aio_biodone(struct buf *bp)
528 {
529
530 uvm_aio_aiodone(bp);
531 }
532
533 void
534 uvm_aio_aiodone(struct buf *bp)
535 {
536
537 if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
538 bioopsp->io_pageiodone(bp);
539 }
540
541 void
542 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
543 {
544
545 vp->v_size = vp->v_writesize = newsize;
546 }
547
548 void
549 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
550 {
551
552 vp->v_writesize = newsize;
553 }
554
555 void
556 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
557 {
558 struct uvm_object *uobj = &vp->v_uobj;
559 struct vm_page **pgs;
560 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
561 int rv, npages, i;
562
563 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
564 while (len) {
565 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
566 memset(pgs, 0, npages * sizeof(struct vm_page *));
567 mutex_enter(&uobj->vmobjlock);
568 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
569 assert(npages > 0);
570
571 for (i = 0; i < npages; i++) {
572 uint8_t *start;
573 size_t chunkoff, chunklen;
574
575 chunkoff = off & PAGE_MASK;
576 chunklen = MIN(PAGE_SIZE - chunkoff, len);
577 start = (uint8_t *)pgs[i]->uanon + chunkoff;
578
579 memset(start, 0, chunklen);
580 pgs[i]->flags &= PG_CLEAN;
581
582 off += chunklen;
583 len -= chunklen;
584 }
585 uvm_page_unbusy(pgs, npages);
586 }
587 kmem_free(pgs, maxpages * sizeof(pgs));
588
589 return;
590 }
591
592 struct uvm_ractx *
593 uvm_ra_allocctx()
594 {
595
596 return NULL;
597 }
598
599 void
600 uvm_ra_freectx(struct uvm_ractx *ra)
601 {
602
603 return;
604 }
605
606 bool
607 uvn_clean_p(struct uvm_object *uobj)
608 {
609 struct vnode *vp = (void *)uobj;
610
611 return (vp->v_iflag & VI_ONWORKLST) == 0;
612 }
613
614 /*
615 * Kmem
616 */
617
618 void *
619 kmem_alloc(size_t size, km_flag_t kmflag)
620 {
621
622 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
623 }
624
625 void *
626 kmem_zalloc(size_t size, km_flag_t kmflag)
627 {
628 void *rv;
629
630 rv = kmem_alloc(size, kmflag);
631 if (rv)
632 memset(rv, 0, size);
633
634 return rv;
635 }
636
637 void
638 kmem_free(void *p, size_t size)
639 {
640
641 rumpuser_free(p);
642 }
643
644 /*
645 * UVM km
646 */
647
648 vaddr_t
649 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
650 {
651 void *rv;
652
653 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
654 if (rv && flags & UVM_KMF_ZERO)
655 memset(rv, 0, size);
656
657 return (vaddr_t)rv;
658 }
659
660 void
661 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
662 {
663
664 rumpuser_free((void *)vaddr);
665 }
666
667 struct vm_map *
668 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
669 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
670 {
671
672 return (struct vm_map *)417416;
673 }
674
675 void
676 uvm_pageout_start(int npages)
677 {
678
679 uvmexp.paging += npages;
680 }
681
682 void
683 uvm_pageout_done(int npages)
684 {
685
686 uvmexp.paging -= npages;
687
688 /*
689 * wake up either of pagedaemon or LWPs waiting for it.
690 */
691
692 if (uvmexp.free <= uvmexp.reserve_kernel) {
693 wakeup(&uvm.pagedaemon);
694 } else {
695 wakeup(&uvmexp.free);
696 }
697 }
698