vm.c revision 1.75 1 /* $NetBSD: vm.c,v 1.75 2010/05/26 21:48:20 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + anon objects & pager
33 * + misc support routines
34 */
35
36 /*
37 * XXX: we abuse pg->uanon for the virtual address of the storage
38 * for each page. phys_addr would fit the job description better,
39 * except that it will create unnecessary lossage on some platforms
40 * due to not being a pointer type.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.75 2010/05/26 21:48:20 pooka Exp $");
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/kmem.h>
49 #include <sys/mman.h>
50 #include <sys/null.h>
51 #include <sys/vnode.h>
52 #include <sys/buf.h>
53
54 #include <machine/pmap.h>
55
56 #include <rump/rumpuser.h>
57
58 #include <uvm/uvm.h>
59 #include <uvm/uvm_ddb.h>
60 #include <uvm/uvm_prot.h>
61 #include <uvm/uvm_readahead.h>
62
63 #include "rump_private.h"
64
65 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
66 int *, int, vm_prot_t, int, int);
67 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
68
69 const struct uvm_pagerops aobj_pager = {
70 .pgo_get = ao_get,
71 .pgo_put = ao_put,
72 };
73
74 kmutex_t uvm_pageqlock;
75
76 struct uvmexp uvmexp;
77 struct uvm uvm;
78
79 struct vmspace rump_vmspace;
80 struct vm_map rump_vmmap;
81 static struct vm_map_kernel kmem_map_store;
82 struct vm_map *kmem_map = &kmem_map_store.vmk_map;
83 const struct rb_tree_ops uvm_page_tree_ops;
84
85 static struct vm_map_kernel kernel_map_store;
86 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
87
88 /*
89 * vm pages
90 */
91
92 /* called with the object locked */
93 struct vm_page *
94 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
95 {
96 struct vm_page *pg;
97
98 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
99 pg->offset = off;
100 pg->uobject = uobj;
101
102 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
103 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
104
105 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
106 uobj->uo_npages++;
107
108 return pg;
109 }
110
111 struct vm_page *
112 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
113 int flags, int strat, int free_list)
114 {
115
116 return rumpvm_makepage(uobj, off);
117 }
118
119 /*
120 * Release a page.
121 *
122 * Called with the vm object locked.
123 */
124 void
125 uvm_pagefree(struct vm_page *pg)
126 {
127 struct uvm_object *uobj = pg->uobject;
128
129 if (pg->flags & PG_WANTED)
130 wakeup(pg);
131
132 uobj->uo_npages--;
133 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
134 kmem_free((void *)pg->uanon, PAGE_SIZE);
135 kmem_free(pg, sizeof(*pg));
136 }
137
138 void
139 uvm_pagezero(struct vm_page *pg)
140 {
141
142 pg->flags &= ~PG_CLEAN;
143 memset((void *)pg->uanon, 0, PAGE_SIZE);
144 }
145
146 /*
147 * Anon object stuff
148 */
149
150 static int
151 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
152 int *npages, int centeridx, vm_prot_t access_type,
153 int advice, int flags)
154 {
155 struct vm_page *pg;
156 int i;
157
158 if (centeridx)
159 panic("%s: centeridx != 0 not supported", __func__);
160
161 /* loop over pages */
162 off = trunc_page(off);
163 for (i = 0; i < *npages; i++) {
164 retrylookup:
165 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
166 if (pg) {
167 if (pg->flags & PG_BUSY) {
168 pg->flags |= PG_WANTED;
169 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
170 "aogetpg", 0);
171 goto retrylookup;
172 }
173 pg->flags |= PG_BUSY;
174 pgs[i] = pg;
175 } else {
176 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
177 pgs[i] = pg;
178 }
179 }
180 mutex_exit(&uobj->vmobjlock);
181
182 return 0;
183
184 }
185
186 static int
187 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
188 {
189 struct vm_page *pg;
190
191 /* we only free all pages for now */
192 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
193 mutex_exit(&uobj->vmobjlock);
194 return 0;
195 }
196
197 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
198 uvm_pagefree(pg);
199 mutex_exit(&uobj->vmobjlock);
200
201 return 0;
202 }
203
204 struct uvm_object *
205 uao_create(vsize_t size, int flags)
206 {
207 struct uvm_object *uobj;
208
209 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
210 uobj->pgops = &aobj_pager;
211 TAILQ_INIT(&uobj->memq);
212 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
213
214 return uobj;
215 }
216
217 void
218 uao_detach(struct uvm_object *uobj)
219 {
220
221 mutex_enter(&uobj->vmobjlock);
222 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
223 mutex_destroy(&uobj->vmobjlock);
224 kmem_free(uobj, sizeof(*uobj));
225 }
226
227 /*
228 * Misc routines
229 */
230
231 static kmutex_t pagermtx;
232
233 void
234 rumpvm_init(void)
235 {
236
237 uvmexp.free = 1024*1024; /* XXX */
238 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
239 rump_vmspace.vm_map.pmap = pmap_kernel();
240
241 mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
242 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
243
244 kernel_map->pmap = pmap_kernel();
245 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
246 kmem_map->pmap = pmap_kernel();
247 callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM);
248 }
249
250
251 void
252 uvm_pagewire(struct vm_page *pg)
253 {
254
255 /* nada */
256 }
257
258 void
259 uvm_pageunwire(struct vm_page *pg)
260 {
261
262 /* nada */
263 }
264
265 /*
266 * This satisfies the "disgusting mmap hack" used by proplib.
267 * We probably should grow some more assertables to make sure we're
268 * not satisfying anything we shouldn't be satisfying. At least we
269 * should make sure it's the local machine we're mmapping ...
270 */
271 int
272 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
273 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
274 {
275 void *uaddr;
276 int error;
277
278 if (prot != (VM_PROT_READ | VM_PROT_WRITE))
279 panic("uvm_mmap() variant unsupported");
280 if (flags != (MAP_PRIVATE | MAP_ANON))
281 panic("uvm_mmap() variant unsupported");
282 /* no reason in particular, but cf. uvm_default_mapaddr() */
283 if (*addr != 0)
284 panic("uvm_mmap() variant unsupported");
285
286 uaddr = rumpuser_anonmmap(size, 0, 0, &error);
287 if (uaddr == NULL)
288 return error;
289
290 *addr = (vaddr_t)uaddr;
291 return 0;
292 }
293
294 struct pagerinfo {
295 vaddr_t pgr_kva;
296 int pgr_npages;
297 struct vm_page **pgr_pgs;
298 bool pgr_read;
299
300 LIST_ENTRY(pagerinfo) pgr_entries;
301 };
302 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
303
304 /*
305 * Pager "map" in routine. Instead of mapping, we allocate memory
306 * and copy page contents there. Not optimal or even strictly
307 * correct (the caller might modify the page contents after mapping
308 * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK.
309 */
310 vaddr_t
311 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
312 {
313 struct pagerinfo *pgri;
314 vaddr_t curkva;
315 int i;
316
317 /* allocate structures */
318 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
319 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
320 pgri->pgr_npages = npages;
321 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
322 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
323
324 /* copy contents to "mapped" memory */
325 for (i = 0, curkva = pgri->pgr_kva;
326 i < npages;
327 i++, curkva += PAGE_SIZE) {
328 /*
329 * We need to copy the previous contents of the pages to
330 * the window even if we are reading from the
331 * device, since the device might not fill the contents of
332 * the full mapped range and we will end up corrupting
333 * data when we unmap the window.
334 */
335 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
336 pgri->pgr_pgs[i] = pgs[i];
337 }
338
339 mutex_enter(&pagermtx);
340 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
341 mutex_exit(&pagermtx);
342
343 return pgri->pgr_kva;
344 }
345
346 /*
347 * map out the pager window. return contents from VA to page storage
348 * and free structures.
349 *
350 * Note: does not currently support partial frees
351 */
352 void
353 uvm_pagermapout(vaddr_t kva, int npages)
354 {
355 struct pagerinfo *pgri;
356 vaddr_t curkva;
357 int i;
358
359 mutex_enter(&pagermtx);
360 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
361 if (pgri->pgr_kva == kva)
362 break;
363 }
364 KASSERT(pgri);
365 if (pgri->pgr_npages != npages)
366 panic("uvm_pagermapout: partial unmapping not supported");
367 LIST_REMOVE(pgri, pgr_entries);
368 mutex_exit(&pagermtx);
369
370 if (pgri->pgr_read) {
371 for (i = 0, curkva = pgri->pgr_kva;
372 i < pgri->pgr_npages;
373 i++, curkva += PAGE_SIZE) {
374 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
375 }
376 }
377
378 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
379 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
380 kmem_free(pgri, sizeof(*pgri));
381 }
382
383 /*
384 * convert va in pager window to page structure.
385 * XXX: how expensive is this (global lock, list traversal)?
386 */
387 struct vm_page *
388 uvm_pageratop(vaddr_t va)
389 {
390 struct pagerinfo *pgri;
391 struct vm_page *pg = NULL;
392 int i;
393
394 mutex_enter(&pagermtx);
395 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
396 if (pgri->pgr_kva <= va
397 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
398 break;
399 }
400 if (pgri) {
401 i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
402 pg = pgri->pgr_pgs[i];
403 }
404 mutex_exit(&pagermtx);
405
406 return pg;
407 }
408
409 /* Called with the vm object locked */
410 struct vm_page *
411 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
412 {
413 struct vm_page *pg;
414
415 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
416 if (pg->offset == off) {
417 return pg;
418 }
419 }
420
421 return NULL;
422 }
423
424 void
425 uvm_page_unbusy(struct vm_page **pgs, int npgs)
426 {
427 struct vm_page *pg;
428 int i;
429
430 for (i = 0; i < npgs; i++) {
431 pg = pgs[i];
432 if (pg == NULL)
433 continue;
434
435 KASSERT(pg->flags & PG_BUSY);
436 if (pg->flags & PG_WANTED)
437 wakeup(pg);
438 if (pg->flags & PG_RELEASED)
439 uvm_pagefree(pg);
440 else
441 pg->flags &= ~(PG_WANTED|PG_BUSY);
442 }
443 }
444
445 void
446 uvm_estimatepageable(int *active, int *inactive)
447 {
448
449 /* XXX: guessing game */
450 *active = 1024;
451 *inactive = 1024;
452 }
453
454 struct vm_map_kernel *
455 vm_map_to_kernel(struct vm_map *map)
456 {
457
458 return (struct vm_map_kernel *)map;
459 }
460
461 bool
462 vm_map_starved_p(struct vm_map *map)
463 {
464
465 return false;
466 }
467
468 void
469 uvm_pageout_start(int npages)
470 {
471
472 uvmexp.paging += npages;
473 }
474
475 void
476 uvm_pageout_done(int npages)
477 {
478
479 uvmexp.paging -= npages;
480
481 /*
482 * wake up either of pagedaemon or LWPs waiting for it.
483 */
484
485 if (uvmexp.free <= uvmexp.reserve_kernel) {
486 wakeup(&uvm.pagedaemon);
487 } else {
488 wakeup(&uvmexp.free);
489 }
490 }
491
492 int
493 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
494 {
495
496 panic("%s: unimplemented", __func__);
497 }
498
499 void
500 uvm_unloan(void *v, int npages, int flags)
501 {
502
503 panic("%s: unimplemented", __func__);
504 }
505
506 int
507 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
508 struct vm_page **opp)
509 {
510
511 return EBUSY;
512 }
513
514 #ifdef DEBUGPRINT
515 void
516 uvm_object_printit(struct uvm_object *uobj, bool full,
517 void (*pr)(const char *, ...))
518 {
519
520 pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs);
521 }
522 #endif
523
524 vaddr_t
525 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
526 {
527
528 return 0;
529 }
530
531 int
532 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
533 vm_prot_t prot, bool set_max)
534 {
535
536 return EOPNOTSUPP;
537 }
538
539 /*
540 * UVM km
541 */
542
543 vaddr_t
544 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
545 {
546 void *rv;
547 int alignbit, error;
548
549 alignbit = 0;
550 if (align) {
551 alignbit = ffs(align)-1;
552 }
553
554 rv = rumpuser_anonmmap(size, alignbit, flags & UVM_KMF_EXEC, &error);
555 if (rv == NULL) {
556 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
557 return 0;
558 else
559 panic("uvm_km_alloc failed");
560 }
561
562 if (flags & UVM_KMF_ZERO)
563 memset(rv, 0, size);
564
565 return (vaddr_t)rv;
566 }
567
568 void
569 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
570 {
571
572 rumpuser_unmap((void *)vaddr, size);
573 }
574
575 struct vm_map *
576 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
577 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
578 {
579
580 return (struct vm_map *)417416;
581 }
582
583 vaddr_t
584 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
585 {
586
587 return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
588 }
589
590 void
591 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
592 {
593
594 rumpuser_unmap((void *)addr, PAGE_SIZE);
595 }
596
597 vaddr_t
598 uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
599 {
600 void *rv;
601 int error;
602
603 rv = rumpuser_anonmmap(PAGE_SIZE, PAGE_SHIFT, 0, &error);
604 if (rv == NULL && waitok)
605 panic("fixme: poolpage alloc failed");
606
607 return (vaddr_t)rv;
608 }
609
610 void
611 uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr)
612 {
613
614 rumpuser_unmap((void *)vaddr, PAGE_SIZE);
615 }
616
617 void
618 uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags)
619 {
620
621 /* we eventually maybe want some model for available memory */
622 }
623
624 /*
625 * Mapping and vm space locking routines.
626 * XXX: these don't work for non-local vmspaces
627 */
628 int
629 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
630 {
631
632 KASSERT(vs == &rump_vmspace);
633 return 0;
634 }
635
636 void
637 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
638 {
639
640 KASSERT(vs == &rump_vmspace);
641 }
642
643 void
644 vmapbuf(struct buf *bp, vsize_t len)
645 {
646
647 bp->b_saveaddr = bp->b_data;
648 }
649
650 void
651 vunmapbuf(struct buf *bp, vsize_t len)
652 {
653
654 bp->b_data = bp->b_saveaddr;
655 bp->b_saveaddr = 0;
656 }
657
658 void
659 uvm_wait(const char *msg)
660 {
661
662 /* nothing to wait for */
663 }
664
665 void
666 uvmspace_free(struct vmspace *vm)
667 {
668
669 /* nothing for now */
670 }
671
672 int
673 uvm_io(struct vm_map *map, struct uio *uio)
674 {
675
676 /*
677 * just do direct uio for now. but this needs some vmspace
678 * olympics for rump_sysproxy.
679 */
680 return uiomove((void *)(vaddr_t)uio->uio_offset, uio->uio_resid, uio);
681 }
682
683 /*
684 * page life cycle stuff. it really doesn't exist, so just stubs.
685 */
686
687 void
688 uvm_pageactivate(struct vm_page *pg)
689 {
690
691 /* nada */
692 }
693
694 void
695 uvm_pagedeactivate(struct vm_page *pg)
696 {
697
698 /* nada */
699 }
700
701 void
702 uvm_pagedequeue(struct vm_page *pg)
703 {
704
705 /* nada*/
706 }
707
708 void
709 uvm_pageenqueue(struct vm_page *pg)
710 {
711
712 /* nada */
713 }
714