vm.c revision 1.23 1 /* $NetBSD: vm.c,v 1.23 2007/11/06 12:57:50 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/null.h>
48 #include <sys/vnode.h>
49 #include <sys/buf.h>
50 #include <sys/kmem.h>
51
52 #include <uvm/uvm.h>
53 #include <uvm/uvm_prot.h>
54 #include <uvm/uvm_readahead.h>
55
56 #include <machine/pmap.h>
57
58 #include "rump_private.h"
59 #include "rumpuser.h"
60
61 /* dumdidumdum */
62 #define len2npages(off, len) \
63 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
64 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
65
66 struct uvm_pagerops uvm_vnodeops;
67 struct uvm_pagerops aobj_pager;
68 struct uvmexp uvmexp;
69 struct uvm uvm;
70
71 struct vmspace rump_vmspace;
72 struct vm_map rump_vmmap;
73
74 /*
75 * vm pages
76 */
77
78 /* called with the object locked */
79 struct vm_page *
80 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
81 {
82 struct vm_page *pg;
83
84 pg = rumpuser_malloc(sizeof(struct vm_page), 0);
85 memset(pg, 0, sizeof(struct vm_page));
86 pg->offset = off;
87 pg->uobject = uobj;
88
89 pg->uanon = (void *)rumpuser_malloc(PAGE_SIZE, 0);
90 memset((void *)pg->uanon, 0, PAGE_SIZE);
91 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
92
93 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq);
94
95 return pg;
96 }
97
98 /*
99 * Release a page.
100 *
101 * Called with the vm object locked.
102 */
103 void
104 uvm_pagefree(struct vm_page *pg)
105 {
106 struct uvm_object *uobj = pg->uobject;
107
108 if (pg->flags & PG_WANTED)
109 wakeup(pg);
110
111 TAILQ_REMOVE(&uobj->memq, pg, listq);
112 rumpuser_free((void *)pg->uanon);
113 rumpuser_free(pg);
114 }
115
116 struct rumpva {
117 vaddr_t addr;
118 struct vm_page *pg;
119
120 LIST_ENTRY(rumpva) entries;
121 };
122 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
123 static kmutex_t rvamtx;
124
125 void
126 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
127 {
128 struct rumpva *rva;
129
130 rva = rumpuser_malloc(sizeof(struct rumpva), 0);
131 rva->addr = addr;
132 rva->pg = pg;
133 mutex_enter(&rvamtx);
134 LIST_INSERT_HEAD(&rvahead, rva, entries);
135 mutex_exit(&rvamtx);
136 }
137
138 void
139 rumpvm_flushva()
140 {
141 struct rumpva *rva;
142
143 mutex_enter(&rvamtx);
144 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
145 LIST_REMOVE(rva, entries);
146 rumpuser_free(rva);
147 }
148 mutex_exit(&rvamtx);
149 }
150
151 /*
152 * vnode pager
153 */
154
155 static int
156 vn_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
157 int *npages, int centeridx, vm_prot_t access_type,
158 int advice, int flags)
159 {
160 struct vnode *vp = (struct vnode *)uobj;
161
162 return VOP_GETPAGES(vp, off, pgs, npages, centeridx, access_type,
163 advice, flags);
164 }
165
166 static int
167 vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
168 {
169 struct vnode *vp = (struct vnode *)uobj;
170
171 return VOP_PUTPAGES(vp, offlo, offhi, flags);
172 }
173
174 /*
175 * Anon object stuff
176 */
177
178 static int
179 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
180 int *npages, int centeridx, vm_prot_t access_type,
181 int advice, int flags)
182 {
183 struct vm_page *pg;
184 int i;
185
186 if (centeridx)
187 panic("%s: centeridx != 0 not supported", __func__);
188
189 /* loop over pages */
190 off = trunc_page(off);
191 for (i = 0; i < *npages; i++) {
192 retrylookup:
193 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
194 if (pg) {
195 if (pg->flags & PG_BUSY) {
196 pg->flags |= PG_WANTED;
197 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
198 "aogetpg", 0);
199 goto retrylookup;
200 }
201 pg->flags |= PG_BUSY;
202 pgs[i] = pg;
203 } else {
204 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
205 pgs[i] = pg;
206 }
207 }
208 simple_unlock(&uobj->vmobjlock);
209
210 return 0;
211
212 }
213
214 static int
215 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
216 {
217 struct vm_page *pg;
218
219 /* we only free all pages for now */
220 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
221 simple_unlock(&uobj->vmobjlock);
222 return 0;
223 }
224
225 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
226 uvm_pagefree(pg);
227 simple_unlock(&uobj->vmobjlock);
228
229 return 0;
230 }
231
232 struct uvm_object *
233 uao_create(vsize_t size, int flags)
234 {
235 struct uvm_object *uobj;
236
237 uobj = rumpuser_malloc(sizeof(struct uvm_object), 0);
238 memset(uobj, 0, sizeof(struct uvm_object));
239 uobj->pgops = &aobj_pager;
240 TAILQ_INIT(&uobj->memq);
241 simple_lock_init(&uobj->vmobjlock);
242
243 return uobj;
244 }
245
246 void
247 uao_detach(struct uvm_object *uobj)
248 {
249
250 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
251 rumpuser_free(uobj);
252 }
253
254 /*
255 * UBC
256 */
257
258 struct ubc_window {
259 struct uvm_object *uwin_obj;
260 voff_t uwin_off;
261 uint8_t *uwin_mem;
262 size_t uwin_mapsize;
263
264 LIST_ENTRY(ubc_window) uwin_entries;
265 };
266
267 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
268 static kmutex_t uwinmtx;
269
270 int
271 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
272 struct ubc_window *uwinp)
273 {
274 struct vm_page **pgs;
275 int npages = len2npages(uio->uio_offset, n);
276 size_t allocsize;
277 int i, rv;
278
279 if (uwinp == NULL) {
280 mutex_enter(&uwinmtx);
281 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
282 if ((uint8_t *)va >= uwinp->uwin_mem
283 && (uint8_t *)va
284 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
285 break;
286 mutex_exit(&uwinmtx);
287 if (uwinp == NULL) {
288 KASSERT(rvp != NULL);
289 return 0;
290 }
291 }
292
293 allocsize = npages * sizeof(pgs);
294 pgs = kmem_zalloc(allocsize, KM_SLEEP);
295 simple_lock(&uwinp->uwin_obj->vmobjlock);
296 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
297 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
298 pgs, &npages, 0, 0, 0, 0);
299 if (rv)
300 goto out;
301
302 for (i = 0; i < npages; i++) {
303 size_t xfersize;
304 off_t pageoff;
305
306 pageoff = uio->uio_offset & PAGE_MASK;
307 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
308 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
309 if (uio->uio_rw == UIO_WRITE)
310 pgs[i]->flags &= ~PG_CLEAN;
311 n -= xfersize;
312 }
313 uvm_page_unbusy(pgs, npages);
314
315 out:
316 kmem_free(pgs, allocsize);
317 if (rvp)
318 *rvp = rv;
319 return 1;
320 }
321
322 static struct ubc_window *
323 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
324 {
325 struct ubc_window *uwinp; /* pronounced: you wimp! */
326
327 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
328 uwinp->uwin_obj = uobj;
329 uwinp->uwin_off = off;
330 uwinp->uwin_mapsize = len;
331 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
332
333 return uwinp;
334 }
335
336 static void
337 uwin_free(struct ubc_window *uwinp)
338 {
339
340 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
341 kmem_free(uwinp, sizeof(struct ubc_window));
342 }
343
344 void *
345 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
346 int flags)
347 {
348 struct ubc_window *uwinp;
349
350 uwinp = uwin_alloc(uobj, offset, *lenp);
351 mutex_enter(&uwinmtx);
352 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
353 mutex_exit(&uwinmtx);
354
355 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
356 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
357
358 return uwinp->uwin_mem;
359 }
360
361 void
362 ubc_release(void *va, int flags)
363 {
364 struct ubc_window *uwinp;
365
366 mutex_enter(&uwinmtx);
367 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
368 if ((uint8_t *)va >= uwinp->uwin_mem
369 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
370 break;
371 mutex_exit(&uwinmtx);
372 if (uwinp == NULL)
373 panic("%s: releasing invalid window at %p", __func__, va);
374
375 LIST_REMOVE(uwinp, uwin_entries);
376 uwin_free(uwinp);
377 }
378
379 int
380 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
381 int advice, int flags)
382 {
383 struct ubc_window *uwinp;
384 vsize_t len;
385
386 while (todo > 0) {
387 len = todo;
388
389 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
390 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
391 uwin_free(uwinp);
392
393 todo -= len;
394 }
395 return 0;
396 }
397
398
399 /*
400 * Misc routines
401 */
402
403 void
404 rumpvm_init()
405 {
406
407 uvm_vnodeops.pgo_get = vn_get;
408 uvm_vnodeops.pgo_put = vn_put;
409 aobj_pager.pgo_get = ao_get;
410 aobj_pager.pgo_put = ao_put;
411
412 uvmexp.free = 1024*1024; /* XXX */
413 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
414
415 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
416 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
417 }
418
419 void
420 uvm_pageactivate(struct vm_page *pg)
421 {
422
423 /* nada */
424 }
425
426 void
427 uvm_pagewire(struct vm_page *pg)
428 {
429
430 /* nada */
431 }
432
433 void
434 uvm_pageunwire(struct vm_page *pg)
435 {
436
437 /* nada */
438 }
439
440 vaddr_t
441 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
442 {
443
444 panic("%s: unimplemented", __func__);
445 }
446
447 /* Called with the vm object locked */
448 struct vm_page *
449 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
450 {
451 struct vm_page *pg;
452
453 TAILQ_FOREACH(pg, &uobj->memq, listq) {
454 if (pg->offset == off) {
455 simple_unlock(&uobj->vmobjlock);
456 return pg;
457 }
458 }
459
460 return NULL;
461 }
462
463 struct vm_page *
464 uvm_pageratop(vaddr_t va)
465 {
466 struct rumpva *rva;
467
468 mutex_enter(&rvamtx);
469 LIST_FOREACH(rva, &rvahead, entries)
470 if (rva->addr == va)
471 break;
472 mutex_exit(&rvamtx);
473
474 if (rva == NULL)
475 panic("%s: va %llu", __func__, (unsigned long long)va);
476
477 return rva->pg;
478 }
479
480 void
481 uvm_page_unbusy(struct vm_page **pgs, int npgs)
482 {
483 struct vm_page *pg;
484 int i;
485
486 for (i = 0; i < npgs; i++) {
487 pg = pgs[i];
488 if (pg == NULL)
489 continue;
490
491 KASSERT(pg->flags & PG_BUSY);
492 if (pg->flags & PG_WANTED)
493 wakeup(pg);
494 pg->flags &= ~(PG_WANTED|PG_BUSY);
495 }
496 }
497
498 void
499 uvm_estimatepageable(int *active, int *inactive)
500 {
501
502 /* XXX: guessing game */
503 *active = 1024;
504 *inactive = 1024;
505 }
506
507 void
508 uvm_aio_biodone1(struct buf *bp)
509 {
510
511 panic("%s: unimplemented", __func__);
512 }
513
514 void
515 uvm_aio_biodone(struct buf *bp)
516 {
517
518 uvm_aio_aiodone(bp);
519 }
520
521 void
522 uvm_aio_aiodone(struct buf *bp)
523 {
524
525 if ((bp->b_flags & (B_READ | B_NOCACHE)) == 0 && bioopsp)
526 bioopsp->io_pageiodone(bp);
527 }
528
529 void
530 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
531 {
532
533 vp->v_size = vp->v_writesize = newsize;
534 }
535
536 void
537 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
538 {
539
540 vp->v_writesize = newsize;
541 }
542
543 void
544 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
545 {
546 struct uvm_object *uobj = &vp->v_uobj;
547 struct vm_page **pgs;
548 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
549 int rv, npages, i;
550
551 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
552 while (len) {
553 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
554 memset(pgs, 0, npages * sizeof(struct vm_page *));
555 simple_lock(&uobj->vmobjlock);
556 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
557 assert(npages > 0);
558
559 for (i = 0; i < npages; i++) {
560 uint8_t *start;
561 size_t chunkoff, chunklen;
562
563 chunkoff = off & PAGE_MASK;
564 chunklen = MIN(PAGE_SIZE - chunkoff, len);
565 start = (uint8_t *)pgs[i]->uanon + chunkoff;
566
567 memset(start, 0, chunklen);
568 pgs[i]->flags &= PG_CLEAN;
569
570 off += chunklen;
571 len -= chunklen;
572 }
573 uvm_page_unbusy(pgs, npages);
574 }
575 kmem_free(pgs, maxpages * sizeof(pgs));
576
577 return;
578 }
579
580 struct uvm_ractx *
581 uvm_ra_allocctx()
582 {
583
584 return NULL;
585 }
586
587 void
588 uvm_ra_freectx(struct uvm_ractx *ra)
589 {
590
591 return;
592 }
593
594 bool
595 uvn_clean_p(struct uvm_object *uobj)
596 {
597 struct vnode *vp = (void *)uobj;
598
599 return (vp->v_iflag & VI_ONWORKLST) == 0;
600 }
601
602 /*
603 * Kmem
604 */
605
606 void *
607 kmem_alloc(size_t size, km_flag_t kmflag)
608 {
609
610 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
611 }
612
613 void *
614 kmem_zalloc(size_t size, km_flag_t kmflag)
615 {
616 void *rv;
617
618 rv = kmem_alloc(size, kmflag);
619 if (rv)
620 memset(rv, 0, size);
621
622 return rv;
623 }
624
625 void
626 kmem_free(void *p, size_t size)
627 {
628
629 rumpuser_free(p);
630 }
631
632 /*
633 * UVM km
634 */
635
636 vaddr_t
637 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
638 {
639 void *rv;
640
641 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
642 if (rv && flags & UVM_KMF_ZERO)
643 memset(rv, 0, size);
644
645 return (vaddr_t)rv;
646 }
647
648 void
649 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
650 {
651
652 rumpuser_free((void *)vaddr);
653 }
654
655 struct vm_map *
656 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
657 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
658 {
659
660 return (struct vm_map *)417416;
661 }
662