vm.c revision 1.63 1 /* $NetBSD: vm.c,v 1.63 2009/10/19 22:31:47 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + anon objects & pager
33 * + misc support routines
34 * + kmem
35 */
36
37 /*
38 * XXX: we abuse pg->uanon for the virtual address of the storage
39 * for each page. phys_addr would fit the job description better,
40 * except that it will create unnecessary lossage on some platforms
41 * due to not being a pointer type.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.63 2009/10/19 22:31:47 pooka Exp $");
46
47 #include <sys/param.h>
48 #include <sys/atomic.h>
49 #include <sys/null.h>
50 #include <sys/vnode.h>
51 #include <sys/buf.h>
52 #include <sys/kmem.h>
53
54 #include <machine/pmap.h>
55
56 #include <rump/rumpuser.h>
57
58 #include <uvm/uvm.h>
59 #include <uvm/uvm_ddb.h>
60 #include <uvm/uvm_prot.h>
61 #include <uvm/uvm_readahead.h>
62
63 #include "rump_private.h"
64
65 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
66 int *, int, vm_prot_t, int, int);
67 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
68
69 const struct uvm_pagerops aobj_pager = {
70 .pgo_get = ao_get,
71 .pgo_put = ao_put,
72 };
73
74 kmutex_t uvm_pageqlock;
75
76 struct uvmexp uvmexp;
77 struct uvm uvm;
78
79 struct vmspace rump_vmspace;
80 struct vm_map rump_vmmap;
81 static struct vm_map_kernel kmem_map_store;
82 struct vm_map *kmem_map = &kmem_map_store.vmk_map;
83 const struct rb_tree_ops uvm_page_tree_ops;
84
85 static struct vm_map_kernel kernel_map_store;
86 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
87
88 /*
89 * vm pages
90 */
91
92 /* called with the object locked */
93 struct vm_page *
94 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
95 {
96 struct vm_page *pg;
97
98 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
99 pg->offset = off;
100 pg->uobject = uobj;
101
102 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
103 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
104
105 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
106 uobj->uo_npages++;
107
108 return pg;
109 }
110
111 /* these are going away very soon */
112 void rumpvm_enterva(vaddr_t addr, struct vm_page *pg) {}
113 void rumpvm_flushva(struct uvm_object *uobj) {}
114
115 struct vm_page *
116 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
117 int flags, int strat, int free_list)
118 {
119
120 return rumpvm_makepage(uobj, off);
121 }
122
123 /*
124 * Release a page.
125 *
126 * Called with the vm object locked.
127 */
128 void
129 uvm_pagefree(struct vm_page *pg)
130 {
131 struct uvm_object *uobj = pg->uobject;
132
133 if (pg->flags & PG_WANTED)
134 wakeup(pg);
135
136 uobj->uo_npages--;
137 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
138 kmem_free((void *)pg->uanon, PAGE_SIZE);
139 kmem_free(pg, sizeof(*pg));
140 }
141
142 void
143 uvm_pagezero(struct vm_page *pg)
144 {
145
146 pg->flags &= ~PG_CLEAN;
147 memset((void *)pg->uanon, 0, PAGE_SIZE);
148 }
149
150 /*
151 * Anon object stuff
152 */
153
154 static int
155 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
156 int *npages, int centeridx, vm_prot_t access_type,
157 int advice, int flags)
158 {
159 struct vm_page *pg;
160 int i;
161
162 if (centeridx)
163 panic("%s: centeridx != 0 not supported", __func__);
164
165 /* loop over pages */
166 off = trunc_page(off);
167 for (i = 0; i < *npages; i++) {
168 retrylookup:
169 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
170 if (pg) {
171 if (pg->flags & PG_BUSY) {
172 pg->flags |= PG_WANTED;
173 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
174 "aogetpg", 0);
175 goto retrylookup;
176 }
177 pg->flags |= PG_BUSY;
178 pgs[i] = pg;
179 } else {
180 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
181 pgs[i] = pg;
182 }
183 }
184 mutex_exit(&uobj->vmobjlock);
185
186 return 0;
187
188 }
189
190 static int
191 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
192 {
193 struct vm_page *pg;
194
195 /* we only free all pages for now */
196 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
197 mutex_exit(&uobj->vmobjlock);
198 return 0;
199 }
200
201 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
202 uvm_pagefree(pg);
203 mutex_exit(&uobj->vmobjlock);
204
205 return 0;
206 }
207
208 struct uvm_object *
209 uao_create(vsize_t size, int flags)
210 {
211 struct uvm_object *uobj;
212
213 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
214 uobj->pgops = &aobj_pager;
215 TAILQ_INIT(&uobj->memq);
216 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
217
218 return uobj;
219 }
220
221 void
222 uao_detach(struct uvm_object *uobj)
223 {
224
225 mutex_enter(&uobj->vmobjlock);
226 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
227 mutex_destroy(&uobj->vmobjlock);
228 kmem_free(uobj, sizeof(*uobj));
229 }
230
231 /*
232 * Misc routines
233 */
234
235 static kmutex_t pagermtx;
236
237 void
238 rumpvm_init(void)
239 {
240
241 uvmexp.free = 1024*1024; /* XXX */
242 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
243 rump_vmspace.vm_map.pmap = pmap_kernel();
244
245 mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
246 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
247
248 kernel_map->pmap = pmap_kernel();
249 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
250 kmem_map->pmap = pmap_kernel();
251 callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM);
252 }
253
254
255
256 void
257 uvm_pagewire(struct vm_page *pg)
258 {
259
260 /* nada */
261 }
262
263 void
264 uvm_pageunwire(struct vm_page *pg)
265 {
266
267 /* nada */
268 }
269
270 int
271 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
272 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
273 {
274
275 panic("%s: unimplemented", __func__);
276 }
277
278 struct pagerinfo {
279 vaddr_t pgr_kva;
280 int pgr_npages;
281 struct vm_page **pgr_pgs;
282 bool pgr_read;
283
284 LIST_ENTRY(pagerinfo) pgr_entries;
285 };
286 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
287
288 /*
289 * Pager "map" in routine. Instead of mapping, we allocate memory
290 * and copy page contents there. Not optimal or even strictly
291 * correct (the caller might modify the page contents after mapping
292 * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK.
293 */
294 vaddr_t
295 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
296 {
297 struct pagerinfo *pgri;
298 vaddr_t curkva;
299 int i;
300
301 /* allocate structures */
302 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
303 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
304 pgri->pgr_npages = npages;
305 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
306 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
307
308 /* copy contents to "mapped" memory */
309 for (i = 0, curkva = pgri->pgr_kva;
310 i < npages;
311 i++, curkva += PAGE_SIZE) {
312 /*
313 * We need to copy the previous contents of the pages to
314 * the window even if we are reading from the
315 * device, since the device might not fill the contents of
316 * the full mapped range and we will end up corrupting
317 * data when we unmap the window.
318 */
319 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
320 pgri->pgr_pgs[i] = pgs[i];
321 }
322
323 mutex_enter(&pagermtx);
324 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
325 mutex_exit(&pagermtx);
326
327 return pgri->pgr_kva;
328 }
329
330 /*
331 * map out the pager window. return contents from VA to page storage
332 * and free structures.
333 *
334 * Note: does not currently support partial frees
335 */
336 void
337 uvm_pagermapout(vaddr_t kva, int npages)
338 {
339 struct pagerinfo *pgri;
340 vaddr_t curkva;
341 int i;
342
343 mutex_enter(&pagermtx);
344 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
345 if (pgri->pgr_kva == kva)
346 break;
347 }
348 KASSERT(pgri);
349 if (pgri->pgr_npages != npages)
350 panic("uvm_pagermapout: partial unmapping not supported");
351 LIST_REMOVE(pgri, pgr_entries);
352 mutex_exit(&pagermtx);
353
354 if (pgri->pgr_read) {
355 for (i = 0, curkva = pgri->pgr_kva;
356 i < pgri->pgr_npages;
357 i++, curkva += PAGE_SIZE) {
358 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
359 }
360 }
361
362 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
363 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
364 kmem_free(pgri, sizeof(*pgri));
365 }
366
367 /*
368 * convert va in pager window to page structure.
369 * XXX: how expensive is this (global lock, list traversal)?
370 */
371 struct vm_page *
372 uvm_pageratop(vaddr_t va)
373 {
374 struct pagerinfo *pgri;
375 struct vm_page *pg = NULL;
376 int i;
377
378 mutex_enter(&pagermtx);
379 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
380 if (pgri->pgr_kva <= va
381 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
382 break;
383 }
384 if (pgri) {
385 i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
386 pg = pgri->pgr_pgs[i];
387 }
388 mutex_exit(&pagermtx);
389
390 return pg;
391 }
392
393 /* Called with the vm object locked */
394 struct vm_page *
395 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
396 {
397 struct vm_page *pg;
398
399 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
400 if (pg->offset == off) {
401 return pg;
402 }
403 }
404
405 return NULL;
406 }
407
408 void
409 uvm_page_unbusy(struct vm_page **pgs, int npgs)
410 {
411 struct vm_page *pg;
412 int i;
413
414 for (i = 0; i < npgs; i++) {
415 pg = pgs[i];
416 if (pg == NULL)
417 continue;
418
419 KASSERT(pg->flags & PG_BUSY);
420 if (pg->flags & PG_WANTED)
421 wakeup(pg);
422 if (pg->flags & PG_RELEASED)
423 uvm_pagefree(pg);
424 else
425 pg->flags &= ~(PG_WANTED|PG_BUSY);
426 }
427 }
428
429 void
430 uvm_estimatepageable(int *active, int *inactive)
431 {
432
433 /* XXX: guessing game */
434 *active = 1024;
435 *inactive = 1024;
436 }
437
438 struct vm_map_kernel *
439 vm_map_to_kernel(struct vm_map *map)
440 {
441
442 return (struct vm_map_kernel *)map;
443 }
444
445 bool
446 vm_map_starved_p(struct vm_map *map)
447 {
448
449 return false;
450 }
451
452 void
453 uvm_pageout_start(int npages)
454 {
455
456 uvmexp.paging += npages;
457 }
458
459 void
460 uvm_pageout_done(int npages)
461 {
462
463 uvmexp.paging -= npages;
464
465 /*
466 * wake up either of pagedaemon or LWPs waiting for it.
467 */
468
469 if (uvmexp.free <= uvmexp.reserve_kernel) {
470 wakeup(&uvm.pagedaemon);
471 } else {
472 wakeup(&uvmexp.free);
473 }
474 }
475
476 /* XXX: following two are unfinished because lwp's are not refcounted yet */
477 void
478 uvm_lwp_hold(struct lwp *l)
479 {
480
481 atomic_inc_uint(&l->l_holdcnt);
482 }
483
484 void
485 uvm_lwp_rele(struct lwp *l)
486 {
487
488 atomic_dec_uint(&l->l_holdcnt);
489 }
490
491 int
492 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
493 {
494
495 panic("%s: unimplemented", __func__);
496 }
497
498 void
499 uvm_unloan(void *v, int npages, int flags)
500 {
501
502 panic("%s: unimplemented", __func__);
503 }
504
505 int
506 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
507 struct vm_page **opp)
508 {
509
510 panic("%s: unimplemented", __func__);
511 }
512
513 void
514 uvm_object_printit(struct uvm_object *uobj, bool full,
515 void (*pr)(const char *, ...))
516 {
517
518 /* nada for now */
519 }
520
521 /*
522 * Kmem
523 */
524
525 #ifndef RUMP_USE_REAL_ALLOCATORS
526 void
527 kmem_init()
528 {
529
530 /* nothing to do */
531 }
532
533 void *
534 kmem_alloc(size_t size, km_flag_t kmflag)
535 {
536
537 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
538 }
539
540 void *
541 kmem_zalloc(size_t size, km_flag_t kmflag)
542 {
543 void *rv;
544
545 rv = kmem_alloc(size, kmflag);
546 if (rv)
547 memset(rv, 0, size);
548
549 return rv;
550 }
551
552 void
553 kmem_free(void *p, size_t size)
554 {
555
556 rumpuser_free(p);
557 }
558 #endif /* RUMP_USE_REAL_ALLOCATORS */
559
560 /*
561 * UVM km
562 */
563
564 vaddr_t
565 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
566 {
567 void *rv;
568 int alignbit, error;
569
570 alignbit = 0;
571 if (align) {
572 alignbit = ffs(align)-1;
573 }
574
575 rv = rumpuser_anonmmap(size, alignbit, flags & UVM_KMF_EXEC, &error);
576 if (rv == NULL) {
577 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
578 return 0;
579 else
580 panic("uvm_km_alloc failed");
581 }
582
583 if (flags & UVM_KMF_ZERO)
584 memset(rv, 0, size);
585
586 return (vaddr_t)rv;
587 }
588
589 void
590 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
591 {
592
593 rumpuser_unmap((void *)vaddr, size);
594 }
595
596 struct vm_map *
597 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
598 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
599 {
600
601 return (struct vm_map *)417416;
602 }
603
604 vaddr_t
605 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
606 {
607
608 return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
609 }
610
611 void
612 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
613 {
614
615 rumpuser_unmap((void *)addr, PAGE_SIZE);
616 }
617
618 vaddr_t
619 uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
620 {
621 void *rv;
622 int error;
623
624 rv = rumpuser_anonmmap(PAGE_SIZE, PAGE_SHIFT, 0, &error);
625 if (rv == NULL && waitok)
626 panic("fixme: poolpage alloc failed");
627
628 return (vaddr_t)rv;
629 }
630
631 void
632 uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr)
633 {
634
635 rumpuser_unmap((void *)vaddr, PAGE_SIZE);
636 }
637
638 /*
639 * Mapping and vm space locking routines.
640 * XXX: these don't work for non-local vmspaces
641 */
642 int
643 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
644 {
645
646 KASSERT(vs == &rump_vmspace);
647 return 0;
648 }
649
650 void
651 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
652 {
653
654 KASSERT(vs == &rump_vmspace);
655 }
656
657 void
658 vmapbuf(struct buf *bp, vsize_t len)
659 {
660
661 bp->b_saveaddr = bp->b_data;
662 }
663
664 void
665 vunmapbuf(struct buf *bp, vsize_t len)
666 {
667
668 bp->b_data = bp->b_saveaddr;
669 bp->b_saveaddr = 0;
670 }
671
672 void
673 uvm_wait(const char *msg)
674 {
675
676 /* nothing to wait for */
677 }
678
679 /*
680 * page life cycle stuff. it really doesn't exist, so just stubs.
681 */
682
683 void
684 uvm_pageactivate(struct vm_page *pg)
685 {
686
687 /* nada */
688 }
689
690 void
691 uvm_pagedeactivate(struct vm_page *pg)
692 {
693
694 /* nada */
695 }
696
697 void
698 uvm_pagedequeue(struct vm_page *pg)
699 {
700
701 /* nada*/
702 }
703
704 void
705 uvm_pageenqueue(struct vm_page *pg)
706 {
707
708 /* nada */
709 }
710
711 void
712 uvm_kick_scheduler(void)
713 {
714
715 /* ouch */
716 }
717