vm.c revision 1.42 1 /* $NetBSD: vm.c,v 1.42 2008/11/19 14:10:49 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + UBC
33 * + anon objects & pager
34 * + vnode objects & pager
35 * + misc support routines
36 * + kmem
37 */
38
39 /*
40 * XXX: we abuse pg->uanon for the virtual address of the storage
41 * for each page. phys_addr would fit the job description better,
42 * except that it will create unnecessary lossage on some platforms
43 * due to not being a pointer type.
44 */
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/null.h>
49 #include <sys/vnode.h>
50 #include <sys/buf.h>
51 #include <sys/kmem.h>
52
53 #include <machine/pmap.h>
54
55 #include <rump/rumpuser.h>
56
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_prot.h>
59
60 #include "rump_private.h"
61
62 /* dumdidumdum */
63 #define len2npages(off, len) \
64 (((((len) + PAGE_MASK) & ~(PAGE_MASK)) >> PAGE_SHIFT) \
65 + (((off & PAGE_MASK) + (len & PAGE_MASK)) > PAGE_SIZE))
66
67 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
68 int *, int, vm_prot_t, int, int);
69 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
70
71 const struct uvm_pagerops aobj_pager = {
72 .pgo_get = ao_get,
73 .pgo_put = ao_put,
74 };
75
76 kmutex_t uvm_pageqlock;
77
78 struct uvmexp uvmexp;
79 struct uvm uvm;
80
81 struct vmspace rump_vmspace;
82 struct vm_map rump_vmmap;
83 const struct rb_tree_ops uvm_page_tree_ops;
84
85 static struct vm_map_kernel kernel_map_store;
86 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
87
88 /*
89 * vm pages
90 */
91
92 /* called with the object locked */
93 struct vm_page *
94 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
95 {
96 struct vm_page *pg;
97
98 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
99 pg->offset = off;
100 pg->uobject = uobj;
101
102 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
103 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
104
105 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
106
107 return pg;
108 }
109
110 /*
111 * Release a page.
112 *
113 * Called with the vm object locked.
114 */
115 void
116 uvm_pagefree(struct vm_page *pg)
117 {
118 struct uvm_object *uobj = pg->uobject;
119
120 if (pg->flags & PG_WANTED)
121 wakeup(pg);
122
123 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
124 kmem_free((void *)pg->uanon, PAGE_SIZE);
125 kmem_free(pg, sizeof(*pg));
126 }
127
128 struct rumpva {
129 vaddr_t addr;
130 struct vm_page *pg;
131
132 LIST_ENTRY(rumpva) entries;
133 };
134 static LIST_HEAD(, rumpva) rvahead = LIST_HEAD_INITIALIZER(rvahead);
135 static kmutex_t rvamtx;
136
137 void
138 rumpvm_enterva(vaddr_t addr, struct vm_page *pg)
139 {
140 struct rumpva *rva;
141
142 rva = kmem_alloc(sizeof(struct rumpva), KM_SLEEP);
143 rva->addr = addr;
144 rva->pg = pg;
145 mutex_enter(&rvamtx);
146 LIST_INSERT_HEAD(&rvahead, rva, entries);
147 mutex_exit(&rvamtx);
148 }
149
150 void
151 rumpvm_flushva()
152 {
153 struct rumpva *rva;
154
155 mutex_enter(&rvamtx);
156 while ((rva = LIST_FIRST(&rvahead)) != NULL) {
157 LIST_REMOVE(rva, entries);
158 kmem_free(rva, sizeof(*rva));
159 }
160 mutex_exit(&rvamtx);
161 }
162
163 /*
164 * Anon object stuff
165 */
166
167 static int
168 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
169 int *npages, int centeridx, vm_prot_t access_type,
170 int advice, int flags)
171 {
172 struct vm_page *pg;
173 int i;
174
175 if (centeridx)
176 panic("%s: centeridx != 0 not supported", __func__);
177
178 /* loop over pages */
179 off = trunc_page(off);
180 for (i = 0; i < *npages; i++) {
181 retrylookup:
182 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
183 if (pg) {
184 if (pg->flags & PG_BUSY) {
185 pg->flags |= PG_WANTED;
186 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
187 "aogetpg", 0);
188 goto retrylookup;
189 }
190 pg->flags |= PG_BUSY;
191 pgs[i] = pg;
192 } else {
193 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
194 pgs[i] = pg;
195 }
196 }
197 mutex_exit(&uobj->vmobjlock);
198
199 return 0;
200
201 }
202
203 static int
204 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
205 {
206 struct vm_page *pg;
207
208 /* we only free all pages for now */
209 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
210 mutex_exit(&uobj->vmobjlock);
211 return 0;
212 }
213
214 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
215 uvm_pagefree(pg);
216 mutex_exit(&uobj->vmobjlock);
217
218 return 0;
219 }
220
221 struct uvm_object *
222 uao_create(vsize_t size, int flags)
223 {
224 struct uvm_object *uobj;
225
226 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
227 uobj->pgops = &aobj_pager;
228 TAILQ_INIT(&uobj->memq);
229 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
230
231 return uobj;
232 }
233
234 void
235 uao_detach(struct uvm_object *uobj)
236 {
237
238 mutex_enter(&uobj->vmobjlock);
239 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
240 kmem_free(uobj, sizeof(*uobj));
241 }
242
243 /*
244 * UBC
245 */
246
247 struct ubc_window {
248 struct uvm_object *uwin_obj;
249 voff_t uwin_off;
250 uint8_t *uwin_mem;
251 size_t uwin_mapsize;
252
253 LIST_ENTRY(ubc_window) uwin_entries;
254 };
255
256 static LIST_HEAD(, ubc_window) uwinlst = LIST_HEAD_INITIALIZER(uwinlst);
257 static kmutex_t uwinmtx;
258
259 int
260 rump_ubc_magic_uiomove(void *va, size_t n, struct uio *uio, int *rvp,
261 struct ubc_window *uwinp)
262 {
263 struct vm_page **pgs;
264 int npages = len2npages(uio->uio_offset, n);
265 size_t allocsize;
266 int i, rv;
267
268 if (uwinp == NULL) {
269 mutex_enter(&uwinmtx);
270 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
271 if ((uint8_t *)va >= uwinp->uwin_mem
272 && (uint8_t *)va
273 < (uwinp->uwin_mem + uwinp->uwin_mapsize))
274 break;
275 mutex_exit(&uwinmtx);
276 if (uwinp == NULL) {
277 KASSERT(rvp != NULL);
278 return 0;
279 }
280 }
281
282 allocsize = npages * sizeof(pgs);
283 pgs = kmem_zalloc(allocsize, KM_SLEEP);
284 mutex_enter(&uwinp->uwin_obj->vmobjlock);
285 rv = uwinp->uwin_obj->pgops->pgo_get(uwinp->uwin_obj,
286 uwinp->uwin_off + ((uint8_t *)va - uwinp->uwin_mem),
287 pgs, &npages, 0, 0, 0, 0);
288 if (rv)
289 goto out;
290
291 for (i = 0; i < npages; i++) {
292 size_t xfersize;
293 off_t pageoff;
294
295 pageoff = uio->uio_offset & PAGE_MASK;
296 xfersize = MIN(MIN(n, PAGE_SIZE), PAGE_SIZE-pageoff);
297 uiomove((uint8_t *)pgs[i]->uanon + pageoff, xfersize, uio);
298 if (uio->uio_rw == UIO_WRITE)
299 pgs[i]->flags &= ~PG_CLEAN;
300 n -= xfersize;
301 }
302 uvm_page_unbusy(pgs, npages);
303
304 out:
305 kmem_free(pgs, allocsize);
306 if (rvp)
307 *rvp = rv;
308 return 1;
309 }
310
311 static struct ubc_window *
312 uwin_alloc(struct uvm_object *uobj, voff_t off, vsize_t len)
313 {
314 struct ubc_window *uwinp; /* pronounced: you wimp! */
315
316 uwinp = kmem_alloc(sizeof(struct ubc_window), KM_SLEEP);
317 uwinp->uwin_obj = uobj;
318 uwinp->uwin_off = off;
319 uwinp->uwin_mapsize = len;
320 uwinp->uwin_mem = kmem_alloc(len, KM_SLEEP);
321
322 return uwinp;
323 }
324
325 static void
326 uwin_free(struct ubc_window *uwinp)
327 {
328
329 kmem_free(uwinp->uwin_mem, uwinp->uwin_mapsize);
330 kmem_free(uwinp, sizeof(struct ubc_window));
331 }
332
333 void *
334 ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
335 int flags)
336 {
337 struct ubc_window *uwinp;
338
339 uwinp = uwin_alloc(uobj, offset, *lenp);
340 mutex_enter(&uwinmtx);
341 LIST_INSERT_HEAD(&uwinlst, uwinp, uwin_entries);
342 mutex_exit(&uwinmtx);
343
344 DPRINTF(("UBC_ALLOC offset 0x%llx, uwin %p, mem %p\n",
345 (unsigned long long)offset, uwinp, uwinp->uwin_mem));
346
347 return uwinp->uwin_mem;
348 }
349
350 void
351 ubc_release(void *va, int flags)
352 {
353 struct ubc_window *uwinp;
354
355 mutex_enter(&uwinmtx);
356 LIST_FOREACH(uwinp, &uwinlst, uwin_entries)
357 if ((uint8_t *)va >= uwinp->uwin_mem
358 && (uint8_t *)va < (uwinp->uwin_mem + uwinp->uwin_mapsize))
359 break;
360 mutex_exit(&uwinmtx);
361 if (uwinp == NULL)
362 panic("%s: releasing invalid window at %p", __func__, va);
363
364 LIST_REMOVE(uwinp, uwin_entries);
365 uwin_free(uwinp);
366 }
367
368 int
369 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
370 int advice, int flags)
371 {
372 struct ubc_window *uwinp;
373 vsize_t len;
374
375 while (todo > 0) {
376 len = todo;
377
378 uwinp = uwin_alloc(uobj, uio->uio_offset, len);
379 rump_ubc_magic_uiomove(uwinp->uwin_mem, len, uio, NULL, uwinp);
380 uwin_free(uwinp);
381
382 todo -= len;
383 }
384 return 0;
385 }
386
387
388 /*
389 * Misc routines
390 */
391
392 void
393 rumpvm_init()
394 {
395
396 uvmexp.free = 1024*1024; /* XXX */
397 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
398 rump_vmspace.vm_map.pmap = pmap_kernel();
399
400 mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
401 mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
402 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
403
404 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
405 }
406
407 void
408 uvm_pageactivate(struct vm_page *pg)
409 {
410
411 /* nada */
412 }
413
414 void
415 uvm_pagewire(struct vm_page *pg)
416 {
417
418 /* nada */
419 }
420
421 void
422 uvm_pageunwire(struct vm_page *pg)
423 {
424
425 /* nada */
426 }
427
428 vaddr_t
429 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
430 {
431
432 panic("%s: unimplemented", __func__);
433 }
434
435 /* Called with the vm object locked */
436 struct vm_page *
437 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
438 {
439 struct vm_page *pg;
440
441 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
442 if (pg->offset == off) {
443 return pg;
444 }
445 }
446
447 return NULL;
448 }
449
450 struct vm_page *
451 uvm_pageratop(vaddr_t va)
452 {
453 struct rumpva *rva;
454
455 mutex_enter(&rvamtx);
456 LIST_FOREACH(rva, &rvahead, entries)
457 if (rva->addr == va)
458 break;
459 mutex_exit(&rvamtx);
460
461 if (rva == NULL)
462 panic("%s: va %llu", __func__, (unsigned long long)va);
463
464 return rva->pg;
465 }
466
467 void
468 uvm_page_unbusy(struct vm_page **pgs, int npgs)
469 {
470 struct vm_page *pg;
471 int i;
472
473 for (i = 0; i < npgs; i++) {
474 pg = pgs[i];
475 if (pg == NULL)
476 continue;
477
478 KASSERT(pg->flags & PG_BUSY);
479 if (pg->flags & PG_WANTED)
480 wakeup(pg);
481 if (pg->flags & PG_RELEASED)
482 uvm_pagefree(pg);
483 else
484 pg->flags &= ~(PG_WANTED|PG_BUSY);
485 }
486 }
487
488 void
489 uvm_estimatepageable(int *active, int *inactive)
490 {
491
492 /* XXX: guessing game */
493 *active = 1024;
494 *inactive = 1024;
495 }
496
497 void
498 uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
499 {
500
501 mutex_enter(&vp->v_interlock);
502 vp->v_size = vp->v_writesize = newsize;
503 mutex_exit(&vp->v_interlock);
504 }
505
506 void
507 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
508 {
509
510 mutex_enter(&vp->v_interlock);
511 vp->v_writesize = newsize;
512 mutex_exit(&vp->v_interlock);
513 }
514
515 void
516 uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
517 {
518 struct uvm_object *uobj = &vp->v_uobj;
519 struct vm_page **pgs;
520 int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
521 int rv, npages, i;
522
523 pgs = kmem_zalloc(maxpages * sizeof(pgs), KM_SLEEP);
524 while (len) {
525 npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
526 memset(pgs, 0, npages * sizeof(struct vm_page *));
527 mutex_enter(&uobj->vmobjlock);
528 rv = uobj->pgops->pgo_get(uobj, off, pgs, &npages, 0, 0, 0, 0);
529 KASSERT(npages > 0);
530
531 for (i = 0; i < npages; i++) {
532 uint8_t *start;
533 size_t chunkoff, chunklen;
534
535 chunkoff = off & PAGE_MASK;
536 chunklen = MIN(PAGE_SIZE - chunkoff, len);
537 start = (uint8_t *)pgs[i]->uanon + chunkoff;
538
539 memset(start, 0, chunklen);
540 pgs[i]->flags &= ~PG_CLEAN;
541
542 off += chunklen;
543 len -= chunklen;
544 }
545 uvm_page_unbusy(pgs, npages);
546 }
547 kmem_free(pgs, maxpages * sizeof(pgs));
548
549 return;
550 }
551
552 struct vm_map_kernel *
553 vm_map_to_kernel(struct vm_map *map)
554 {
555
556 return (struct vm_map_kernel *)map;
557 }
558
559 bool
560 vm_map_starved_p(struct vm_map *map)
561 {
562
563 return false;
564 }
565
566 void
567 uvm_pageout_start(int npages)
568 {
569
570 uvmexp.paging += npages;
571 }
572
573 void
574 uvm_pageout_done(int npages)
575 {
576
577 uvmexp.paging -= npages;
578
579 /*
580 * wake up either of pagedaemon or LWPs waiting for it.
581 */
582
583 if (uvmexp.free <= uvmexp.reserve_kernel) {
584 wakeup(&uvm.pagedaemon);
585 } else {
586 wakeup(&uvmexp.free);
587 }
588 }
589
590 /* XXX: following two are unfinished because lwp's are not refcounted yet */
591 void
592 uvm_lwp_hold(struct lwp *l)
593 {
594
595 atomic_inc_uint(&l->l_holdcnt);
596 }
597
598 void
599 uvm_lwp_rele(struct lwp *l)
600 {
601
602 atomic_dec_uint(&l->l_holdcnt);
603 }
604
605 int
606 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
607 {
608
609 panic("%s: unimplemented", __func__);
610 }
611
612 void
613 uvm_unloan(void *v, int npages, int flags)
614 {
615
616 panic("%s: unimplemented", __func__);
617 }
618
619 /*
620 * Kmem
621 */
622
623 #ifndef RUMP_USE_REAL_KMEM
624 void *
625 kmem_alloc(size_t size, km_flag_t kmflag)
626 {
627
628 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
629 }
630
631 void *
632 kmem_zalloc(size_t size, km_flag_t kmflag)
633 {
634 void *rv;
635
636 rv = kmem_alloc(size, kmflag);
637 if (rv)
638 memset(rv, 0, size);
639
640 return rv;
641 }
642
643 void
644 kmem_free(void *p, size_t size)
645 {
646
647 rumpuser_free(p);
648 }
649 #endif /* RUMP_USE_REAL_KMEM */
650
651 /*
652 * UVM km
653 */
654
655 vaddr_t
656 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
657 {
658 void *rv;
659
660 rv = rumpuser_malloc(size, flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT));
661 if (rv && flags & UVM_KMF_ZERO)
662 memset(rv, 0, size);
663
664 return (vaddr_t)rv;
665 }
666
667 void
668 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
669 {
670
671 rumpuser_free((void *)vaddr);
672 }
673
674 struct vm_map *
675 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
676 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
677 {
678
679 return (struct vm_map *)417416;
680 }
681
682 vaddr_t
683 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
684 {
685
686 return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
687 }
688
689 void
690 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
691 {
692
693 rumpuser_free((void *)addr);
694 }
695