vm.c revision 1.30.4.3 1 /* $NetBSD: vm.c,v 1.30.4.3 2009/08/19 18:48:30 yamt Exp $ */
2
3 /*
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by Google Summer of Code.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * Virtual memory emulation routines. Contents:
32 * + anon objects & pager
33 * + misc support routines
34 * + kmem
35 */
36
37 /*
38 * XXX: we abuse pg->uanon for the virtual address of the storage
39 * for each page. phys_addr would fit the job description better,
40 * except that it will create unnecessary lossage on some platforms
41 * due to not being a pointer type.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.30.4.3 2009/08/19 18:48:30 yamt Exp $");
46
47 #include <sys/param.h>
48 #include <sys/atomic.h>
49 #include <sys/null.h>
50 #include <sys/vnode.h>
51 #include <sys/buf.h>
52 #include <sys/kmem.h>
53
54 #include <machine/pmap.h>
55
56 #include <rump/rumpuser.h>
57
58 #include <uvm/uvm.h>
59 #include <uvm/uvm_ddb.h>
60 #include <uvm/uvm_prot.h>
61 #include <uvm/uvm_readahead.h>
62
63 #include "rump_private.h"
64
65 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
66 int *, int, vm_prot_t, int, int);
67 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
68
69 const struct uvm_pagerops aobj_pager = {
70 .pgo_get = ao_get,
71 .pgo_put = ao_put,
72 };
73
74 kmutex_t uvm_pageqlock;
75
76 struct uvmexp uvmexp;
77 struct uvm uvm;
78
79 struct vmspace rump_vmspace;
80 struct vm_map rump_vmmap;
81 static struct vm_map_kernel kmem_map_store;
82 struct vm_map *kmem_map = &kmem_map_store.vmk_map;
83 const struct rb_tree_ops uvm_page_tree_ops;
84
85 static struct vm_map_kernel kernel_map_store;
86 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
87
88 /*
89 * vm pages
90 */
91
92 /* called with the object locked */
93 struct vm_page *
94 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
95 {
96 struct vm_page *pg;
97
98 pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
99 pg->offset = off;
100 pg->uobject = uobj;
101
102 pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
103 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
104
105 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
106 uobj->uo_npages++;
107
108 return pg;
109 }
110
111 /* these are going away very soon */
112 void rumpvm_enterva(vaddr_t addr, struct vm_page *pg) {}
113 void rumpvm_flushva(struct uvm_object *uobj) {}
114
115 struct vm_page *
116 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
117 int flags, int strat, int free_list)
118 {
119
120 return rumpvm_makepage(uobj, off);
121 }
122
123 /*
124 * Release a page.
125 *
126 * Called with the vm object locked.
127 */
128 void
129 uvm_pagefree(struct vm_page *pg)
130 {
131 struct uvm_object *uobj = pg->uobject;
132
133 if (pg->flags & PG_WANTED)
134 wakeup(pg);
135
136 uobj->uo_npages--;
137 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
138 kmem_free((void *)pg->uanon, PAGE_SIZE);
139 kmem_free(pg, sizeof(*pg));
140 }
141
142 void
143 uvm_pagezero(struct vm_page *pg)
144 {
145
146 pg->flags &= ~PG_CLEAN;
147 memset((void *)pg->uanon, 0, PAGE_SIZE);
148 }
149
150 /*
151 * Anon object stuff
152 */
153
154 static int
155 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
156 int *npages, int centeridx, vm_prot_t access_type,
157 int advice, int flags)
158 {
159 struct vm_page *pg;
160 int i;
161
162 if (centeridx)
163 panic("%s: centeridx != 0 not supported", __func__);
164
165 /* loop over pages */
166 off = trunc_page(off);
167 for (i = 0; i < *npages; i++) {
168 retrylookup:
169 pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
170 if (pg) {
171 if (pg->flags & PG_BUSY) {
172 pg->flags |= PG_WANTED;
173 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
174 "aogetpg", 0);
175 goto retrylookup;
176 }
177 pg->flags |= PG_BUSY;
178 pgs[i] = pg;
179 } else {
180 pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
181 pgs[i] = pg;
182 }
183 }
184 mutex_exit(&uobj->vmobjlock);
185
186 return 0;
187
188 }
189
190 static int
191 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
192 {
193 struct vm_page *pg;
194
195 /* we only free all pages for now */
196 if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
197 mutex_exit(&uobj->vmobjlock);
198 return 0;
199 }
200
201 while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
202 uvm_pagefree(pg);
203 mutex_exit(&uobj->vmobjlock);
204
205 return 0;
206 }
207
208 struct uvm_object *
209 uao_create(vsize_t size, int flags)
210 {
211 struct uvm_object *uobj;
212
213 uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
214 uobj->pgops = &aobj_pager;
215 TAILQ_INIT(&uobj->memq);
216 mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
217
218 return uobj;
219 }
220
221 void
222 uao_detach(struct uvm_object *uobj)
223 {
224
225 mutex_enter(&uobj->vmobjlock);
226 ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
227 mutex_destroy(&uobj->vmobjlock);
228 kmem_free(uobj, sizeof(*uobj));
229 }
230
231 /*
232 * Misc routines
233 */
234
235 static kmutex_t pagermtx;
236
237 void
238 rumpvm_init(void)
239 {
240
241 uvmexp.free = 1024*1024; /* XXX */
242 uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
243 rump_vmspace.vm_map.pmap = pmap_kernel();
244
245 mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
246 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
247
248 kernel_map->pmap = pmap_kernel();
249 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
250 kmem_map->pmap = pmap_kernel();
251 callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM);
252 }
253
254
255
256 void
257 uvm_pagewire(struct vm_page *pg)
258 {
259
260 /* nada */
261 }
262
263 void
264 uvm_pageunwire(struct vm_page *pg)
265 {
266
267 /* nada */
268 }
269
270 int
271 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
272 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
273 {
274
275 panic("%s: unimplemented", __func__);
276 }
277
278 struct pagerinfo {
279 vaddr_t pgr_kva;
280 int pgr_npages;
281 struct vm_page **pgr_pgs;
282 bool pgr_read;
283
284 LIST_ENTRY(pagerinfo) pgr_entries;
285 };
286 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
287
288 /*
289 * Pager "map" in routine. Instead of mapping, we allocate memory
290 * and copy page contents there. Not optimal or even strictly
291 * correct (the caller might modify the page contents after mapping
292 * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK.
293 */
294 vaddr_t
295 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
296 {
297 struct pagerinfo *pgri;
298 vaddr_t curkva;
299 int i;
300
301 /* allocate structures */
302 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
303 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
304 pgri->pgr_npages = npages;
305 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
306 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
307
308 /* copy contents to "mapped" memory */
309 for (i = 0, curkva = pgri->pgr_kva;
310 i < npages;
311 i++, curkva += PAGE_SIZE) {
312 /*
313 * We need to copy the previous contents of the pages to
314 * the window even if we are reading from the
315 * device, since the device might not fill the contents of
316 * the full mapped range and we will end up corrupting
317 * data when we unmap the window.
318 */
319 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
320 pgri->pgr_pgs[i] = pgs[i];
321 }
322
323 mutex_enter(&pagermtx);
324 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
325 mutex_exit(&pagermtx);
326
327 return pgri->pgr_kva;
328 }
329
330 /*
331 * map out the pager window. return contents from VA to page storage
332 * and free structures.
333 *
334 * Note: does not currently support partial frees
335 */
336 void
337 uvm_pagermapout(vaddr_t kva, int npages)
338 {
339 struct pagerinfo *pgri;
340 vaddr_t curkva;
341 int i;
342
343 mutex_enter(&pagermtx);
344 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
345 if (pgri->pgr_kva == kva)
346 break;
347 }
348 KASSERT(pgri);
349 if (pgri->pgr_npages != npages)
350 panic("uvm_pagermapout: partial unmapping not supported");
351 LIST_REMOVE(pgri, pgr_entries);
352 mutex_exit(&pagermtx);
353
354 if (pgri->pgr_read) {
355 for (i = 0, curkva = pgri->pgr_kva;
356 i < pgri->pgr_npages;
357 i++, curkva += PAGE_SIZE) {
358 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
359 }
360 }
361
362 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
363 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
364 kmem_free(pgri, sizeof(*pgri));
365 }
366
367 /*
368 * convert va in pager window to page structure.
369 * XXX: how expensive is this (global lock, list traversal)?
370 */
371 struct vm_page *
372 uvm_pageratop(vaddr_t va)
373 {
374 struct pagerinfo *pgri;
375 struct vm_page *pg = NULL;
376 int i;
377
378 mutex_enter(&pagermtx);
379 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
380 if (pgri->pgr_kva <= va
381 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
382 break;
383 }
384 if (pgri) {
385 i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
386 pg = pgri->pgr_pgs[i];
387 }
388 mutex_exit(&pagermtx);
389
390 return pg;
391 }
392
393 /* Called with the vm object locked */
394 struct vm_page *
395 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
396 {
397 struct vm_page *pg;
398
399 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
400 if (pg->offset == off) {
401 return pg;
402 }
403 }
404
405 return NULL;
406 }
407
408 void
409 uvm_page_unbusy(struct vm_page **pgs, int npgs)
410 {
411 struct vm_page *pg;
412 int i;
413
414 for (i = 0; i < npgs; i++) {
415 pg = pgs[i];
416 if (pg == NULL)
417 continue;
418
419 KASSERT(pg->flags & PG_BUSY);
420 if (pg->flags & PG_WANTED)
421 wakeup(pg);
422 if (pg->flags & PG_RELEASED)
423 uvm_pagefree(pg);
424 else
425 pg->flags &= ~(PG_WANTED|PG_BUSY);
426 }
427 }
428
429 void
430 uvm_estimatepageable(int *active, int *inactive)
431 {
432
433 /* XXX: guessing game */
434 *active = 1024;
435 *inactive = 1024;
436 }
437
438 struct vm_map_kernel *
439 vm_map_to_kernel(struct vm_map *map)
440 {
441
442 return (struct vm_map_kernel *)map;
443 }
444
445 bool
446 vm_map_starved_p(struct vm_map *map)
447 {
448
449 return false;
450 }
451
452 void
453 uvm_pageout_start(int npages)
454 {
455
456 uvmexp.paging += npages;
457 }
458
459 void
460 uvm_pageout_done(int npages)
461 {
462
463 uvmexp.paging -= npages;
464
465 /*
466 * wake up either of pagedaemon or LWPs waiting for it.
467 */
468
469 if (uvmexp.free <= uvmexp.reserve_kernel) {
470 wakeup(&uvm.pagedaemon);
471 } else {
472 wakeup(&uvmexp.free);
473 }
474 }
475
476 /* XXX: following two are unfinished because lwp's are not refcounted yet */
477 void
478 uvm_lwp_hold(struct lwp *l)
479 {
480
481 atomic_inc_uint(&l->l_holdcnt);
482 }
483
484 void
485 uvm_lwp_rele(struct lwp *l)
486 {
487
488 atomic_dec_uint(&l->l_holdcnt);
489 }
490
491 int
492 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
493 {
494
495 panic("%s: unimplemented", __func__);
496 }
497
498 void
499 uvm_unloan(void *v, int npages, int flags)
500 {
501
502 panic("%s: unimplemented", __func__);
503 }
504
505 int
506 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
507 struct vm_page **opp)
508 {
509
510 panic("%s: unimplemented", __func__);
511 }
512
513 void
514 uvm_object_printit(struct uvm_object *uobj, bool full,
515 void (*pr)(const char *, ...))
516 {
517
518 /* nada for now */
519 }
520
521 int
522 uvm_readahead(struct uvm_object *uobj, off_t off, off_t size)
523 {
524
525 /* nada for now */
526 return 0;
527 }
528
529 /*
530 * Kmem
531 */
532
533 #ifndef RUMP_USE_REAL_ALLOCATORS
534 void
535 kmem_init()
536 {
537
538 /* nothing to do */
539 }
540
541 void *
542 kmem_alloc(size_t size, km_flag_t kmflag)
543 {
544
545 return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
546 }
547
548 void *
549 kmem_zalloc(size_t size, km_flag_t kmflag)
550 {
551 void *rv;
552
553 rv = kmem_alloc(size, kmflag);
554 if (rv)
555 memset(rv, 0, size);
556
557 return rv;
558 }
559
560 void
561 kmem_free(void *p, size_t size)
562 {
563
564 rumpuser_free(p);
565 }
566 #endif /* RUMP_USE_REAL_ALLOCATORS */
567
568 /*
569 * UVM km
570 */
571
572 vaddr_t
573 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
574 {
575 void *rv;
576 int alignbit, error;
577
578 alignbit = 0;
579 if (align) {
580 alignbit = ffs(align)-1;
581 }
582
583 rv = rumpuser_anonmmap(size, alignbit, flags & UVM_KMF_EXEC, &error);
584 if (rv == NULL) {
585 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
586 return 0;
587 else
588 panic("uvm_km_alloc failed");
589 }
590
591 if (flags & UVM_KMF_ZERO)
592 memset(rv, 0, size);
593
594 return (vaddr_t)rv;
595 }
596
597 void
598 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
599 {
600
601 rumpuser_unmap((void *)vaddr, size);
602 }
603
604 struct vm_map *
605 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
606 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
607 {
608
609 return (struct vm_map *)417416;
610 }
611
612 vaddr_t
613 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
614 {
615
616 return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
617 }
618
619 void
620 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
621 {
622
623 rumpuser_unmap((void *)addr, PAGE_SIZE);
624 }
625
626 vaddr_t
627 uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
628 {
629 void *rv;
630 int error;
631
632 rv = rumpuser_anonmmap(PAGE_SIZE, PAGE_SHIFT, 0, &error);
633 if (rv == NULL && waitok)
634 panic("fixme: poolpage alloc failed");
635
636 return (vaddr_t)rv;
637 }
638
639 void
640 uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr)
641 {
642
643 rumpuser_unmap((void *)vaddr, PAGE_SIZE);
644 }
645
646 /*
647 * Mapping and vm space locking routines.
648 * XXX: these don't work for non-local vmspaces
649 */
650 int
651 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
652 {
653
654 KASSERT(vs == &rump_vmspace);
655 return 0;
656 }
657
658 void
659 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
660 {
661
662 KASSERT(vs == &rump_vmspace);
663 }
664
665 void
666 vmapbuf(struct buf *bp, vsize_t len)
667 {
668
669 bp->b_saveaddr = bp->b_data;
670 }
671
672 void
673 vunmapbuf(struct buf *bp, vsize_t len)
674 {
675
676 bp->b_data = bp->b_saveaddr;
677 bp->b_saveaddr = 0;
678 }
679
680 void
681 uvm_wait(const char *msg)
682 {
683
684 /* nothing to wait for */
685 }
686
687 /*
688 * page life cycle stuff. it really doesn't exist, so just stubs.
689 */
690
691 void
692 uvm_pageactivate(struct vm_page *pg)
693 {
694
695 /* nada */
696 }
697
698 void
699 uvm_pagedeactivate(struct vm_page *pg)
700 {
701
702 /* nada */
703 }
704
705 void
706 uvm_pagedequeue(struct vm_page *pg)
707 {
708
709 /* nada*/
710 }
711
712 void
713 uvm_pageenqueue(struct vm_page *pg)
714 {
715
716 /* nada */
717 }
718