vm.c revision 1.69 1 /* $NetBSD: vm.c,v 1.69 2009/12/04 17:15:47 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + anon objects & pager
33 * + misc support routines
34 */
35
36 /*
37 * XXX: we abuse pg->uanon for the virtual address of the storage
38 * for each page. phys_addr would fit the job description better,
39 * except that it will create unnecessary lossage on some platforms
40 * due to not being a pointer type.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.69 2009/12/04 17:15:47 pooka Exp $");
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/kmem.h>
49 #include <sys/mman.h>
50 #include <sys/null.h>
51 #include <sys/vnode.h>
52 #include <sys/buf.h>
53
54 #include <machine/pmap.h>
55
56 #include <rump/rumpuser.h>
57
58 #include <uvm/uvm.h>
59 #include <uvm/uvm_ddb.h>
60 #include <uvm/uvm_prot.h>
61 #include <uvm/uvm_readahead.h>
62
63 #include "rump_private.h"
64
65 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
66 int *, int, vm_prot_t, int, int);
67 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
68
69 const struct uvm_pagerops aobj_pager = {
70 .pgo_get = ao_get,
71 .pgo_put = ao_put,
72 };
73
74 kmutex_t uvm_pageqlock;
75
76 struct uvmexp uvmexp;
77 struct uvm uvm;
78
79 struct vmspace rump_vmspace;
80 struct vm_map rump_vmmap;
81 static struct vm_map_kernel kmem_map_store;
82 struct vm_map *kmem_map = &kmem_map_store.vmk_map;
83 const struct rb_tree_ops uvm_page_tree_ops;
84
85 static struct vm_map_kernel kernel_map_store;
86 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
87
88 /*
89 * vm pages
90 */
91
92 /* called with the object locked */
93 struct vm_page *
94 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
95 {
96 struct vm_page *pg;
97
98 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
99 pg->offset = off;
100 pg->uobject = uobj;
101
102 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
103 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
104
105 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
106 uobj->uo_npages++;
107
108 return pg;
109 }
110
111 /* these are going away very soon */
112 void rumpvm_enterva(vaddr_t addr, struct vm_page *pg) {}
113 void rumpvm_flushva(struct uvm_object *uobj) {}
114
115 struct vm_page *
116 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
117 int flags, int strat, int free_list)
118 {
119
120 return rumpvm_makepage(uobj, off);
121 }
122
123 /*
124 * Release a page.
125 *
126 * Called with the vm object locked.
127 */
128 void
129 uvm_pagefree(struct vm_page *pg)
130 {
131 struct uvm_object *uobj = pg->uobject;
132
133 if (pg->flags & PG_WANTED)
134 wakeup(pg);
135
136 uobj->uo_npages--;
137 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
138 kmem_free((void *)pg->uanon, PAGE_SIZE);
139 kmem_free(pg, sizeof(*pg));
140 }
141
142 void
143 uvm_pagezero(struct vm_page *pg)
144 {
145
146 pg->flags &= ~PG_CLEAN;
147 memset((void *)pg->uanon, 0, PAGE_SIZE);
148 }
149
150 /*
151 * Anon object stuff
152 */
153
154 static int
155 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
156 int *npages, int centeridx, vm_prot_t access_type,
157 int advice, int flags)
158 {
159 struct vm_page *pg;
160 int i;
161
162 if (centeridx)
163 panic("%s: centeridx != 0 not supported", __func__);
164
165 /* loop over pages */
166 off = trunc_page(off);
167 for (i = 0; i < *npages; i++) {
168 retrylookup:
169 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
170 if (pg) {
171 if (pg->flags & PG_BUSY) {
172 pg->flags |= PG_WANTED;
173 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
174 "aogetpg", 0);
175 goto retrylookup;
176 }
177 pg->flags |= PG_BUSY;
178 pgs[i] = pg;
179 } else {
180 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
181 pgs[i] = pg;
182 }
183 }
184 mutex_exit(&uobj->vmobjlock);
185
186 return 0;
187
188 }
189
190 static int
191 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
192 {
193 struct vm_page *pg;
194
195 /* we only free all pages for now */
196 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
197 mutex_exit(&uobj->vmobjlock);
198 return 0;
199 }
200
201 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
202 uvm_pagefree(pg);
203 mutex_exit(&uobj->vmobjlock);
204
205 return 0;
206 }
207
208 struct uvm_object *
209 uao_create(vsize_t size, int flags)
210 {
211 struct uvm_object *uobj;
212
213 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
214 uobj->pgops = &aobj_pager;
215 TAILQ_INIT(&uobj->memq);
216 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
217
218 return uobj;
219 }
220
221 void
222 uao_detach(struct uvm_object *uobj)
223 {
224
225 mutex_enter(&uobj->vmobjlock);
226 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
227 mutex_destroy(&uobj->vmobjlock);
228 kmem_free(uobj, sizeof(*uobj));
229 }
230
231 /*
232 * Misc routines
233 */
234
235 static kmutex_t pagermtx;
236
237 void
238 rumpvm_init(void)
239 {
240
241 uvmexp.free = 1024*1024; /* XXX */
242 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
243 rump_vmspace.vm_map.pmap = pmap_kernel();
244
245 mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
246 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
247
248 kernel_map->pmap = pmap_kernel();
249 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
250 kmem_map->pmap = pmap_kernel();
251 callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM);
252 }
253
254
255 void
256 uvm_pagewire(struct vm_page *pg)
257 {
258
259 /* nada */
260 }
261
262 void
263 uvm_pageunwire(struct vm_page *pg)
264 {
265
266 /* nada */
267 }
268
269 /*
270 * This satisfies the "disgusting mmap hack" used by proplib.
271 * We probably should grow some more assertables to make sure we're
272 * not satisfying anything we shouldn't be satisfying. At least we
273 * should make sure it's the local machine we're mmapping ...
274 */
275 int
276 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
277 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
278 {
279 void *uaddr;
280 int error;
281
282 if (prot != (VM_PROT_READ | VM_PROT_WRITE))
283 panic("uvm_mmap() variant unsupported");
284 if (flags != (MAP_PRIVATE | MAP_ANON))
285 panic("uvm_mmap() variant unsupported");
286 /* no reason in particular, but cf. uvm_default_mapaddr() */
287 if (*addr != 0)
288 panic("uvm_mmap() variant unsupported");
289
290 uaddr = rumpuser_anonmmap(size, 0, 0, &error);
291 if (uaddr == NULL)
292 return error;
293
294 *addr = (vaddr_t)uaddr;
295 return 0;
296 }
297
298 struct pagerinfo {
299 vaddr_t pgr_kva;
300 int pgr_npages;
301 struct vm_page **pgr_pgs;
302 bool pgr_read;
303
304 LIST_ENTRY(pagerinfo) pgr_entries;
305 };
306 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
307
308 /*
309 * Pager "map" in routine. Instead of mapping, we allocate memory
310 * and copy page contents there. Not optimal or even strictly
311 * correct (the caller might modify the page contents after mapping
312 * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK.
313 */
314 vaddr_t
315 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
316 {
317 struct pagerinfo *pgri;
318 vaddr_t curkva;
319 int i;
320
321 /* allocate structures */
322 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
323 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
324 pgri->pgr_npages = npages;
325 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
326 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
327
328 /* copy contents to "mapped" memory */
329 for (i = 0, curkva = pgri->pgr_kva;
330 i < npages;
331 i++, curkva += PAGE_SIZE) {
332 /*
333 * We need to copy the previous contents of the pages to
334 * the window even if we are reading from the
335 * device, since the device might not fill the contents of
336 * the full mapped range and we will end up corrupting
337 * data when we unmap the window.
338 */
339 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
340 pgri->pgr_pgs[i] = pgs[i];
341 }
342
343 mutex_enter(&pagermtx);
344 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
345 mutex_exit(&pagermtx);
346
347 return pgri->pgr_kva;
348 }
349
350 /*
351 * map out the pager window. return contents from VA to page storage
352 * and free structures.
353 *
354 * Note: does not currently support partial frees
355 */
356 void
357 uvm_pagermapout(vaddr_t kva, int npages)
358 {
359 struct pagerinfo *pgri;
360 vaddr_t curkva;
361 int i;
362
363 mutex_enter(&pagermtx);
364 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
365 if (pgri->pgr_kva == kva)
366 break;
367 }
368 KASSERT(pgri);
369 if (pgri->pgr_npages != npages)
370 panic("uvm_pagermapout: partial unmapping not supported");
371 LIST_REMOVE(pgri, pgr_entries);
372 mutex_exit(&pagermtx);
373
374 if (pgri->pgr_read) {
375 for (i = 0, curkva = pgri->pgr_kva;
376 i < pgri->pgr_npages;
377 i++, curkva += PAGE_SIZE) {
378 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
379 }
380 }
381
382 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
383 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
384 kmem_free(pgri, sizeof(*pgri));
385 }
386
387 /*
388 * convert va in pager window to page structure.
389 * XXX: how expensive is this (global lock, list traversal)?
390 */
391 struct vm_page *
392 uvm_pageratop(vaddr_t va)
393 {
394 struct pagerinfo *pgri;
395 struct vm_page *pg = NULL;
396 int i;
397
398 mutex_enter(&pagermtx);
399 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
400 if (pgri->pgr_kva <= va
401 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
402 break;
403 }
404 if (pgri) {
405 i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
406 pg = pgri->pgr_pgs[i];
407 }
408 mutex_exit(&pagermtx);
409
410 return pg;
411 }
412
413 /* Called with the vm object locked */
414 struct vm_page *
415 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
416 {
417 struct vm_page *pg;
418
419 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
420 if (pg->offset == off) {
421 return pg;
422 }
423 }
424
425 return NULL;
426 }
427
428 void
429 uvm_page_unbusy(struct vm_page **pgs, int npgs)
430 {
431 struct vm_page *pg;
432 int i;
433
434 for (i = 0; i < npgs; i++) {
435 pg = pgs[i];
436 if (pg == NULL)
437 continue;
438
439 KASSERT(pg->flags & PG_BUSY);
440 if (pg->flags & PG_WANTED)
441 wakeup(pg);
442 if (pg->flags & PG_RELEASED)
443 uvm_pagefree(pg);
444 else
445 pg->flags &= ~(PG_WANTED|PG_BUSY);
446 }
447 }
448
449 void
450 uvm_estimatepageable(int *active, int *inactive)
451 {
452
453 /* XXX: guessing game */
454 *active = 1024;
455 *inactive = 1024;
456 }
457
458 struct vm_map_kernel *
459 vm_map_to_kernel(struct vm_map *map)
460 {
461
462 return (struct vm_map_kernel *)map;
463 }
464
465 bool
466 vm_map_starved_p(struct vm_map *map)
467 {
468
469 return false;
470 }
471
472 void
473 uvm_pageout_start(int npages)
474 {
475
476 uvmexp.paging += npages;
477 }
478
479 void
480 uvm_pageout_done(int npages)
481 {
482
483 uvmexp.paging -= npages;
484
485 /*
486 * wake up either of pagedaemon or LWPs waiting for it.
487 */
488
489 if (uvmexp.free <= uvmexp.reserve_kernel) {
490 wakeup(&uvm.pagedaemon);
491 } else {
492 wakeup(&uvmexp.free);
493 }
494 }
495
496 int
497 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
498 {
499
500 panic("%s: unimplemented", __func__);
501 }
502
503 void
504 uvm_unloan(void *v, int npages, int flags)
505 {
506
507 panic("%s: unimplemented", __func__);
508 }
509
510 int
511 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
512 struct vm_page **opp)
513 {
514
515 panic("%s: unimplemented", __func__);
516 }
517
518 void
519 uvm_object_printit(struct uvm_object *uobj, bool full,
520 void (*pr)(const char *, ...))
521 {
522
523 /* nada for now */
524 }
525
526 vaddr_t
527 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
528 {
529
530 return 0;
531 }
532
533 /*
534 * UVM km
535 */
536
537 vaddr_t
538 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
539 {
540 void *rv;
541 int alignbit, error;
542
543 alignbit = 0;
544 if (align) {
545 alignbit = ffs(align)-1;
546 }
547
548 rv = rumpuser_anonmmap(size, alignbit, flags & UVM_KMF_EXEC, &error);
549 if (rv == NULL) {
550 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
551 return 0;
552 else
553 panic("uvm_km_alloc failed");
554 }
555
556 if (flags & UVM_KMF_ZERO)
557 memset(rv, 0, size);
558
559 return (vaddr_t)rv;
560 }
561
562 void
563 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
564 {
565
566 rumpuser_unmap((void *)vaddr, size);
567 }
568
569 struct vm_map *
570 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
571 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
572 {
573
574 return (struct vm_map *)417416;
575 }
576
577 vaddr_t
578 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
579 {
580
581 return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
582 }
583
584 void
585 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
586 {
587
588 rumpuser_unmap((void *)addr, PAGE_SIZE);
589 }
590
591 vaddr_t
592 uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
593 {
594 void *rv;
595 int error;
596
597 rv = rumpuser_anonmmap(PAGE_SIZE, PAGE_SHIFT, 0, &error);
598 if (rv == NULL && waitok)
599 panic("fixme: poolpage alloc failed");
600
601 return (vaddr_t)rv;
602 }
603
604 void
605 uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr)
606 {
607
608 rumpuser_unmap((void *)vaddr, PAGE_SIZE);
609 }
610
611 /*
612 * Mapping and vm space locking routines.
613 * XXX: these don't work for non-local vmspaces
614 */
615 int
616 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
617 {
618
619 KASSERT(vs == &rump_vmspace);
620 return 0;
621 }
622
623 void
624 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
625 {
626
627 KASSERT(vs == &rump_vmspace);
628 }
629
630 void
631 vmapbuf(struct buf *bp, vsize_t len)
632 {
633
634 bp->b_saveaddr = bp->b_data;
635 }
636
637 void
638 vunmapbuf(struct buf *bp, vsize_t len)
639 {
640
641 bp->b_data = bp->b_saveaddr;
642 bp->b_saveaddr = 0;
643 }
644
645 void
646 uvm_wait(const char *msg)
647 {
648
649 /* nothing to wait for */
650 }
651
652 void
653 uvmspace_free(struct vmspace *vm)
654 {
655
656 /* nothing for now */
657 }
658
659 int
660 uvm_io(struct vm_map *map, struct uio *uio)
661 {
662
663 /*
664 * just do direct uio for now. but this needs some vmspace
665 * olympics for rump_sysproxy.
666 */
667 return uiomove((void *)(vaddr_t)uio->uio_offset, uio->uio_resid, uio);
668 }
669
670 /*
671 * page life cycle stuff. it really doesn't exist, so just stubs.
672 */
673
674 void
675 uvm_pageactivate(struct vm_page *pg)
676 {
677
678 /* nada */
679 }
680
681 void
682 uvm_pagedeactivate(struct vm_page *pg)
683 {
684
685 /* nada */
686 }
687
688 void
689 uvm_pagedequeue(struct vm_page *pg)
690 {
691
692 /* nada*/
693 }
694
695 void
696 uvm_pageenqueue(struct vm_page *pg)
697 {
698
699 /* nada */
700 }
701