vm.c revision 1.29 1 /* $NetBSD: vm.c,v 1.29 2008/01/27 20:10:53 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/null.h>
48 #include <sys/vnode.h>
49 #include <sys/buf.h>
50 #include <sys/kmem.h>
51
52 #include <uvm/uvm.h>
53 #include <uvm/uvm_prot.h>
54 #include <uvm/uvm_readahead.h>
55
56 #include <machine/pmap.h>
57
58 #include "rump_private.h"
59 #include "rumpuser.h"
60
61 /* dumdidumdum */
62 #define len2npages(off, len) \
63 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
64 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
65
66 static int vn_get(struct uvm_object *, voff_t, struct vm_page **,
67 int *, int, vm_prot_t, int, int);
68 static int vn_put(struct uvm_object *, voff_t, voff_t, int);
69 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
70 int *, int, vm_prot_t, int, int);
71 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
72
73 const struct uvm_pagerops uvm_vnodeops = {
74 .pgo_get = vn_get,
75 .pgo_put = vn_put,
76 };
77 const struct uvm_pagerops aobj_pager = {
78 .pgo_get = ao_get,
79 .pgo_put = ao_put,
80 };
81
82 kmutex_t uvm_pageqlock;
83
84 struct uvmexp uvmexp;
85 struct uvm uvm;
86
87 struct vmspace rump_vmspace;
88 struct vm_map rump_vmmap;
89
90 /*
91 * vm pages
92 */
93
94 /* called with the object locked */
95 struct vm_page *
96 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
97 {
98 struct vm_page *pg;
99
100 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
101 pg->offset = off;
102 pg->uobject = uobj;
103
104 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
105 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
106
107 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq);
108
109 return pg;
110 }
111
112 /*
113 * Release a page.
114 *
115 * Called with the vm object locked.
116 */
117 void
118 uvm_pagefree(struct vm_page *pg)
119 {
120 struct uvm_object *uobj = pg->uobject;
121
122 if (pg->flags & PG_WANTED)
123 wakeup(pg);
124
125 TAILQ_REMOVE(&uobj->memq, pg, listq);
126 kmem_free((void *)pg->uanon, PAGE_SIZE);
127 kmem_free(pg, sizeof(*pg));
128 }
129
130 struct rumpva {
131 vaddr_t addr;
132 struct vm_page *pg;
133
134 LIST_ENTRY(rumpva) entries;
135 };
136 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
137 static kmutex_t rvamtx;
138
139 void
140 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
141 {
142 struct rumpva *rva;
143
144 rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP);
145 rva->addr = addr;
146 rva->pg = pg;
147 mutex_enter(&rvamtx);
148 LIST_INSERT_HEAD(&rvahead, rva, entries);
149 mutex_exit(&rvamtx);
150 }
151
152 void
153 rumpvm_flushva()
154 {
155 struct rumpva *rva;
156
157 mutex_enter(&rvamtx);
158 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
159 LIST_REMOVE(rva, entries);
160 kmem_free(rva, sizeof(*rva));
161 }
162 mutex_exit(&rvamtx);
163 }
164
165 /*
166 * vnode pager
167 */
168
169 static int
170 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
171 int *npages, int centeridx, vm_prot_t access_type,
172 int advice, int flags)
173 {
174 struct vnode *vp = (struct vnode *)uobj;
175
176 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
177 advice, flags);
178 }
179
180 static int
181 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
182 {
183 struct vnode *vp = (struct vnode *)uobj;
184
185 return VOP_PUTPAGES(vp, offlo, offhi, flags);
186 }
187
188 /*
189 * Anon object stuff
190 */
191
192 static int
193 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
194 int *npages, int centeridx, vm_prot_t access_type,
195 int advice, int flags)
196 {
197 struct vm_page *pg;
198 int i;
199
200 if (centeridx)
201 panic("%s: centeridx != 0 not supported", __func__);
202
203 /* loop over pages */
204 off = trunc_page(off);
205 for (i = 0; i < *npages; i++) {
206 retrylookup:
207 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
208 if (pg) {
209 if (pg->flags & PG_BUSY) {
210 pg->flags |= PG_WANTED;
211 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
212 "aogetpg", 0);
213 goto retrylookup;
214 }
215 pg->flags |= PG_BUSY;
216 pgs[i] = pg;
217 } else {
218 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
219 pgs[i] = pg;
220 }
221 }
222 mutex_exit(&uobj->vmobjlock);
223
224 return 0;
225
226 }
227
228 static int
229 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
230 {
231 struct vm_page *pg;
232
233 /* we only free all pages for now */
234 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
235 mutex_exit(&uobj->vmobjlock);
236 return 0;
237 }
238
239 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
240 uvm_pagefree(pg);
241 mutex_exit(&uobj->vmobjlock);
242
243 return 0;
244 }
245
246 struct uvm_object *
247 uao_create(vsize_t size, int flags)
248 {
249 struct uvm_object *uobj;
250
251 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
252 uobj->pgops = &aobj_pager;
253 TAILQ_INIT(&uobj->memq);
254 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
255
256 return uobj;
257 }
258
259 void
260 uao_detach(struct uvm_object *uobj)
261 {
262
263 mutex_enter(&uobj->vmobjlock);
264 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
265 kmem_free(uobj, sizeof(*uobj));
266 }
267
268 /*
269 * UBC
270 */
271
272 struct ubc_window {
273 struct uvm_object *uwin_obj;
274 voff_t uwin_off;
275 uint8_t *uwin_mem;
276 size_t uwin_mapsize;
277
278 LIST_ENTRY(ubc_window) uwin_entries;
279 };
280
281 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
282 static kmutex_t uwinmtx;
283
284 int
285 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
286 struct ubc_window *uwinp)
287 {
288 struct vm_page **pgs;
289 int npages = len2npages(uio->uio_offset, n);
290 size_t allocsize;
291 int i, rv;
292
293 if (uwinp == NULL) {
294 mutex_enter(&uwinmtx);
295 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
296 if ((uint8_t *)va >= uwinp->uwin_mem
297 && (uint8_t *)va
298 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
299 break;
300 mutex_exit(&uwinmtx);
301 if (uwinp == NULL) {
302 KASSERT(rvp != NULL);
303 return 0;
304 }
305 }
306
307 allocsize = npages * sizeof(pgs);
308 pgs = kmem_zalloc(allocsize, KM_SLEEP);
309 mutex_enter(&uwinp->uwin_obj->vmobjlock);
310 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
311 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
312 pgs, &npages, 0, 0, 0, 0);
313 if (rv)
314 goto out;
315
316 for (i = 0; i < npages; i++) {
317 size_t xfersize;
318 off_t pageoff;
319
320 pageoff = uio->uio_offset & PAGE_MASK;
321 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
322 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
323 if (uio->uio_rw == UIO_WRITE)
324 pgs[i]->flags &= ~PG_CLEAN;
325 n -= xfersize;
326 }
327 uvm_page_unbusy(pgs, npages);
328
329 out:
330 kmem_free(pgs, allocsize);
331 if (rvp)
332 *rvp = rv;
333 return 1;
334 }
335
336 static struct ubc_window *
337 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
338 {
339 struct ubc_window *uwinp; /* pronounced: you wimp! */
340
341 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
342 uwinp->uwin_obj = uobj;
343 uwinp->uwin_off = off;
344 uwinp->uwin_mapsize = len;
345 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
346
347 return uwinp;
348 }
349
350 static void
351 uwin_free(struct ubc_window *uwinp)
352 {
353
354 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
355 kmem_free(uwinp, sizeof(struct ubc_window));
356 }
357
358 void *
359 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
360 int flags)
361 {
362 struct ubc_window *uwinp;
363
364 uwinp = uwin_alloc(uobj, offset, *lenp);
365 mutex_enter(&uwinmtx);
366 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
367 mutex_exit(&uwinmtx);
368
369 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
370 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
371
372 return uwinp->uwin_mem;
373 }
374
375 void
376 ubc_release(void *va, int flags)
377 {
378 struct ubc_window *uwinp;
379
380 mutex_enter(&uwinmtx);
381 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
382 if ((uint8_t *)va >= uwinp->uwin_mem
383 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
384 break;
385 mutex_exit(&uwinmtx);
386 if (uwinp == NULL)
387 panic("%s: releasing invalid window at %p", __func__, va);
388
389 LIST_REMOVE(uwinp, uwin_entries);
390 uwin_free(uwinp);
391 }
392
393 int
394 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
395 int advice, int flags)
396 {
397 struct ubc_window *uwinp;
398 vsize_t len;
399
400 while (todo > 0) {
401 len = todo;
402
403 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
404 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
405 uwin_free(uwinp);
406
407 todo -= len;
408 }
409 return 0;
410 }
411
412
413 /*
414 * Misc routines
415 */
416
417 void
418 rumpvm_init()
419 {
420
421 uvmexp.free = 1024*1024; /* XXX */
422 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
423
424 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
425 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
426 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
427 }
428
429 void
430 uvm_pageactivate(struct vm_page *pg)
431 {
432
433 /* nada */
434 }
435
436 void
437 uvm_pagewire(struct vm_page *pg)
438 {
439
440 /* nada */
441 }
442
443 void
444 uvm_pageunwire(struct vm_page *pg)
445 {
446
447 /* nada */
448 }
449
450 vaddr_t
451 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
452 {
453
454 panic("%s: unimplemented", __func__);
455 }
456
457 /* Called with the vm object locked */
458 struct vm_page *
459 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
460 {
461 struct vm_page *pg;
462
463 TAILQ_FOREACH(pg, &uobj->memq, listq) {
464 if (pg->offset == off) {
465 return pg;
466 }
467 }
468
469 return NULL;
470 }
471
472 struct vm_page *
473 uvm_pageratop(vaddr_t va)
474 {
475 struct rumpva *rva;
476
477 mutex_enter(&rvamtx);
478 LIST_FOREACH(rva, &rvahead, entries)
479 if (rva->addr == va)
480 break;
481 mutex_exit(&rvamtx);
482
483 if (rva == NULL)
484 panic("%s: va %llu", __func__, (unsigned long long)va);
485
486 return rva->pg;
487 }
488
489 void
490 uvm_page_unbusy(struct vm_page **pgs, int npgs)
491 {
492 struct vm_page *pg;
493 int i;
494
495 for (i = 0; i < npgs; i++) {
496 pg = pgs[i];
497 if (pg == NULL)
498 continue;
499
500 KASSERT(pg->flags & PG_BUSY);
501 if (pg->flags & PG_WANTED)
502 wakeup(pg);
503 pg->flags &= ~(PG_WANTED|PG_BUSY);
504 }
505 }
506
507 void
508 uvm_estimatepageable(int *active, int *inactive)
509 {
510
511 /* XXX: guessing game */
512 *active = 1024;
513 *inactive = 1024;
514 }
515
516 void
517 uvm_aio_biodone1(struct buf *bp)
518 {
519
520 panic("%s: unimplemented", __func__);
521 }
522
523 void
524 uvm_aio_biodone(struct buf *bp)
525 {
526
527 uvm_aio_aiodone(bp);
528 }
529
530 void
531 uvm_aio_aiodone(struct buf *bp)
532 {
533
534 if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
535 bioopsp->io_pageiodone(bp);
536 }
537
538 void
539 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
540 {
541
542 vp->v_size = vp->v_writesize = newsize;
543 }
544
545 void
546 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
547 {
548
549 vp->v_writesize = newsize;
550 }
551
552 void
553 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
554 {
555 struct uvm_object *uobj = &vp->v_uobj;
556 struct vm_page **pgs;
557 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
558 int rv, npages, i;
559
560 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
561 while (len) {
562 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
563 memset(pgs, 0, npages * sizeof(struct vm_page *));
564 mutex_enter(&uobj->vmobjlock);
565 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
566 assert(npages > 0);
567
568 for (i = 0; i < npages; i++) {
569 uint8_t *start;
570 size_t chunkoff, chunklen;
571
572 chunkoff = off & PAGE_MASK;
573 chunklen = MIN(PAGE_SIZE - chunkoff, len);
574 start = (uint8_t *)pgs[i]->uanon + chunkoff;
575
576 memset(start, 0, chunklen);
577 pgs[i]->flags &= PG_CLEAN;
578
579 off += chunklen;
580 len -= chunklen;
581 }
582 uvm_page_unbusy(pgs, npages);
583 }
584 kmem_free(pgs, maxpages * sizeof(pgs));
585
586 return;
587 }
588
589 struct uvm_ractx *
590 uvm_ra_allocctx()
591 {
592
593 return NULL;
594 }
595
596 void
597 uvm_ra_freectx(struct uvm_ractx *ra)
598 {
599
600 return;
601 }
602
603 bool
604 uvn_clean_p(struct uvm_object *uobj)
605 {
606 struct vnode *vp = (void *)uobj;
607
608 return (vp->v_iflag & VI_ONWORKLST) == 0;
609 }
610
611 /*
612 * Kmem
613 */
614
615 void *
616 kmem_alloc(size_t size, km_flag_t kmflag)
617 {
618
619 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
620 }
621
622 void *
623 kmem_zalloc(size_t size, km_flag_t kmflag)
624 {
625 void *rv;
626
627 rv = kmem_alloc(size, kmflag);
628 if (rv)
629 memset(rv, 0, size);
630
631 return rv;
632 }
633
634 void
635 kmem_free(void *p, size_t size)
636 {
637
638 rumpuser_free(p);
639 }
640
641 /*
642 * UVM km
643 */
644
645 vaddr_t
646 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
647 {
648 void *rv;
649
650 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
651 if (rv && flags & UVM_KMF_ZERO)
652 memset(rv, 0, size);
653
654 return (vaddr_t)rv;
655 }
656
657 void
658 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
659 {
660
661 rumpuser_free((void *)vaddr);
662 }
663
664 struct vm_map *
665 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
666 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
667 {
668
669 return (struct vm_map *)417416;
670 }
671
672 void
673 uvm_pageout_start(int npages)
674 {
675
676 uvmexp.paging += npages;
677 }
678
679 void
680 uvm_pageout_done(int npages)
681 {
682
683 uvmexp.paging -= npages;
684
685 /*
686 * wake up either of pagedaemon or LWPs waiting for it.
687 */
688
689 if (uvmexp.free <= uvmexp.reserve_kernel) {
690 wakeup(&uvm.pagedaemon);
691 } else {
692 wakeup(&uvmexp.free);
693 }
694 }
695