1 1.6 chs /* $NetBSD: uvm_pgflcache.c,v 1.6 2020/10/18 18:31:31 chs Exp $ */ 2 1.1 ad 3 1.1 ad /*- 4 1.1 ad * Copyright (c) 2019 The NetBSD Foundation, Inc. 5 1.1 ad * All rights reserved. 6 1.1 ad * 7 1.1 ad * This code is derived from software contributed to The NetBSD Foundation 8 1.1 ad * by Andrew Doran. 9 1.1 ad * 10 1.1 ad * Redistribution and use in source and binary forms, with or without 11 1.1 ad * modification, are permitted provided that the following conditions 12 1.1 ad * are met: 13 1.1 ad * 1. Redistributions of source code must retain the above copyright 14 1.1 ad * notice, this list of conditions and the following disclaimer. 15 1.1 ad * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 ad * notice, this list of conditions and the following disclaimer in the 17 1.1 ad * documentation and/or other materials provided with the distribution. 18 1.1 ad * 19 1.1 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 ad * POSSIBILITY OF SUCH DAMAGE. 30 1.1 ad */ 31 1.1 ad 32 1.1 ad /* 33 1.1 ad * uvm_pgflcache.c: page freelist cache. 34 1.1 ad * 35 1.1 ad * This implements a tiny per-CPU cache of pages that sits between the main 36 1.1 ad * page allocator and the freelists. By allocating and freeing pages in 37 1.1 ad * batch, it reduces freelist contention by an order of magnitude. 38 1.1 ad * 39 1.1 ad * The cache can be paused & resumed at runtime so that UVM_HOTPLUG, 40 1.1 ad * uvm_pglistalloc() and uvm_page_redim() can have a consistent view of the 41 1.1 ad * world. On system with one CPU per physical package (e.g. a uniprocessor) 42 1.1 ad * the cache is not enabled. 43 1.1 ad */ 44 1.1 ad 45 1.1 ad #include <sys/cdefs.h> 46 1.6 chs __KERNEL_RCSID(0, "$NetBSD: uvm_pgflcache.c,v 1.6 2020/10/18 18:31:31 chs Exp $"); 47 1.1 ad 48 1.1 ad #include "opt_uvm.h" 49 1.1 ad #include "opt_multiprocessor.h" 50 1.1 ad 51 1.1 ad #include <sys/param.h> 52 1.1 ad #include <sys/systm.h> 53 1.1 ad #include <sys/sched.h> 54 1.1 ad #include <sys/kernel.h> 55 1.1 ad #include <sys/vnode.h> 56 1.1 ad #include <sys/proc.h> 57 1.1 ad #include <sys/atomic.h> 58 1.1 ad #include <sys/cpu.h> 59 1.1 ad #include <sys/xcall.h> 60 1.1 ad 61 1.1 ad #include <uvm/uvm.h> 62 1.1 ad #include <uvm/uvm_pglist.h> 63 1.1 ad #include <uvm/uvm_pgflcache.h> 64 1.1 ad 65 1.1 ad /* There is no point doing any of this on a uniprocessor. */ 66 1.1 ad #ifdef MULTIPROCESSOR 67 1.1 ad 68 1.1 ad /* 69 1.1 ad * MAXPGS - maximum pages per color, per bucket. 70 1.1 ad * FILLPGS - number of pages to allocate at once, per color, per bucket. 71 1.1 ad * 72 1.1 ad * Why the chosen values: 73 1.1 ad * 74 1.1 ad * (1) In 2019, an average Intel system has 4kB pages and 8x L2 cache 75 1.1 ad * colors. We make the assumption that most of the time allocation activity 76 1.1 ad * will be centered around one UVM freelist, so most of the time there will 77 1.1 ad * be no more than 224kB worth of cached pages per-CPU. That's tiny, but 78 1.1 ad * enough to hugely reduce contention on the freelist locks, and give us a 79 1.1 ad * small pool of pages which if we're very lucky may have some L1/L2 cache 80 1.1 ad * locality, and do so without subtracting too much from the L2/L3 cache 81 1.1 ad * benefits of having per-package free lists in the page allocator. 82 1.1 ad * 83 1.1 ad * (2) With the chosen values on _LP64, the data structure for each color 84 1.1 ad * takes up a single cache line (64 bytes) giving this very low overhead 85 1.1 ad * even in the "miss" case. 86 1.1 ad * 87 1.1 ad * (3) We don't want to cause too much pressure by hiding away memory that 88 1.1 ad * could otherwise be put to good use. 89 1.1 ad */ 90 1.1 ad #define MAXPGS 7 91 1.1 ad #define FILLPGS 6 92 1.1 ad 93 1.1 ad /* Variable size, according to # colors. */ 94 1.1 ad struct pgflcache { 95 1.1 ad struct pccolor { 96 1.1 ad intptr_t count; 97 1.1 ad struct vm_page *pages[MAXPGS]; 98 1.1 ad } color[1]; 99 1.1 ad }; 100 1.1 ad 101 1.1 ad static kmutex_t uvm_pgflcache_lock; 102 1.1 ad static int uvm_pgflcache_sem; 103 1.1 ad 104 1.1 ad /* 105 1.1 ad * uvm_pgflcache_fill: fill specified freelist/color from global list 106 1.1 ad * 107 1.1 ad * => must be called at IPL_VM 108 1.1 ad * => must be called with given bucket lock held 109 1.1 ad * => must only fill from the correct bucket for this CPU 110 1.1 ad */ 111 1.1 ad 112 1.1 ad void 113 1.1 ad uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c) 114 1.1 ad { 115 1.1 ad struct pgflbucket *pgb; 116 1.1 ad struct pgflcache *pc; 117 1.1 ad struct pccolor *pcc; 118 1.1 ad struct pgflist *head; 119 1.1 ad struct vm_page *pg; 120 1.1 ad int count; 121 1.1 ad 122 1.1 ad KASSERT(mutex_owned(&uvm_freelist_locks[b].lock)); 123 1.1 ad KASSERT(ucpu->pgflbucket == b); 124 1.1 ad 125 1.1 ad /* If caching is off, then bail out. */ 126 1.1 ad if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 127 1.1 ad return; 128 1.1 ad } 129 1.1 ad 130 1.1 ad /* Fill only to the limit. */ 131 1.1 ad pcc = &pc->color[c]; 132 1.1 ad pgb = uvm.page_free[fl].pgfl_buckets[b]; 133 1.1 ad head = &pgb->pgb_colors[c]; 134 1.1 ad if (pcc->count >= FILLPGS) { 135 1.1 ad return; 136 1.1 ad } 137 1.1 ad 138 1.1 ad /* Pull pages from the bucket until it's empty, or we are full. */ 139 1.1 ad count = pcc->count; 140 1.1 ad pg = LIST_FIRST(head); 141 1.1 ad while (__predict_true(pg != NULL && count < FILLPGS)) { 142 1.1 ad KASSERT(pg->flags & PG_FREE); 143 1.1 ad KASSERT(uvm_page_get_bucket(pg) == b); 144 1.1 ad pcc->pages[count++] = pg; 145 1.1 ad pg = LIST_NEXT(pg, pageq.list); 146 1.1 ad } 147 1.1 ad 148 1.1 ad /* Violate LIST abstraction to remove all pages at once. */ 149 1.1 ad head->lh_first = pg; 150 1.1 ad if (__predict_true(pg != NULL)) { 151 1.1 ad pg->pageq.list.le_prev = &head->lh_first; 152 1.1 ad } 153 1.1 ad pgb->pgb_nfree -= (count - pcc->count); 154 1.6 chs CPU_COUNT(CPU_COUNT_FREEPAGES, -(count - pcc->count)); 155 1.1 ad pcc->count = count; 156 1.1 ad } 157 1.1 ad 158 1.1 ad /* 159 1.1 ad * uvm_pgflcache_spill: spill specified freelist/color to global list 160 1.1 ad * 161 1.1 ad * => must be called at IPL_VM 162 1.1 ad * => mark __noinline so we don't pull it into uvm_pgflcache_free() 163 1.1 ad */ 164 1.1 ad 165 1.1 ad static void __noinline 166 1.1 ad uvm_pgflcache_spill(struct uvm_cpu *ucpu, int fl, int c) 167 1.1 ad { 168 1.1 ad struct pgflbucket *pgb; 169 1.1 ad struct pgfreelist *pgfl; 170 1.1 ad struct pgflcache *pc; 171 1.1 ad struct pccolor *pcc; 172 1.1 ad struct pgflist *head; 173 1.1 ad kmutex_t *lock; 174 1.1 ad int b, adj; 175 1.1 ad 176 1.1 ad pc = ucpu->pgflcache[fl]; 177 1.1 ad pcc = &pc->color[c]; 178 1.1 ad pgfl = &uvm.page_free[fl]; 179 1.1 ad b = ucpu->pgflbucket; 180 1.1 ad pgb = pgfl->pgfl_buckets[b]; 181 1.1 ad head = &pgb->pgb_colors[c]; 182 1.1 ad lock = &uvm_freelist_locks[b].lock; 183 1.1 ad 184 1.1 ad mutex_spin_enter(lock); 185 1.1 ad for (adj = pcc->count; pcc->count != 0;) { 186 1.1 ad pcc->count--; 187 1.1 ad KASSERT(pcc->pages[pcc->count] != NULL); 188 1.1 ad KASSERT(pcc->pages[pcc->count]->flags & PG_FREE); 189 1.1 ad LIST_INSERT_HEAD(head, pcc->pages[pcc->count], pageq.list); 190 1.1 ad } 191 1.1 ad pgb->pgb_nfree += adj; 192 1.6 chs CPU_COUNT(CPU_COUNT_FREEPAGES, adj); 193 1.1 ad mutex_spin_exit(lock); 194 1.1 ad } 195 1.1 ad 196 1.1 ad /* 197 1.1 ad * uvm_pgflcache_alloc: try to allocate a cached page. 198 1.1 ad * 199 1.1 ad * => must be called at IPL_VM 200 1.1 ad * => allocate only from the given freelist and given page color 201 1.1 ad */ 202 1.1 ad 203 1.1 ad struct vm_page * 204 1.1 ad uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c) 205 1.1 ad { 206 1.1 ad struct pgflcache *pc; 207 1.1 ad struct pccolor *pcc; 208 1.1 ad struct vm_page *pg; 209 1.1 ad 210 1.1 ad /* If caching is off, then bail out. */ 211 1.1 ad if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 212 1.1 ad return NULL; 213 1.1 ad } 214 1.1 ad 215 1.1 ad /* Very simple: if we have a page then return it. */ 216 1.1 ad pcc = &pc->color[c]; 217 1.1 ad if (__predict_false(pcc->count == 0)) { 218 1.1 ad return NULL; 219 1.1 ad } 220 1.1 ad pg = pcc->pages[--(pcc->count)]; 221 1.1 ad KASSERT(pg != NULL); 222 1.5 ad KASSERT(pg->flags == PG_FREE); 223 1.1 ad KASSERT(uvm_page_get_freelist(pg) == fl); 224 1.1 ad KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket); 225 1.5 ad pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 226 1.1 ad return pg; 227 1.1 ad } 228 1.1 ad 229 1.1 ad /* 230 1.1 ad * uvm_pgflcache_free: cache a page, if possible. 231 1.1 ad * 232 1.1 ad * => must be called at IPL_VM 233 1.1 ad * => must only send pages for the correct bucket for this CPU 234 1.1 ad */ 235 1.1 ad 236 1.1 ad bool 237 1.1 ad uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg) 238 1.1 ad { 239 1.1 ad struct pgflcache *pc; 240 1.1 ad struct pccolor *pcc; 241 1.1 ad int fl, c; 242 1.1 ad 243 1.1 ad KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket); 244 1.1 ad 245 1.1 ad /* If caching is off, then bail out. */ 246 1.1 ad fl = uvm_page_get_freelist(pg); 247 1.1 ad if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) { 248 1.1 ad return false; 249 1.1 ad } 250 1.1 ad 251 1.1 ad /* If the array is full spill it first, then add page to array. */ 252 1.1 ad c = VM_PGCOLOR(pg); 253 1.1 ad pcc = &pc->color[c]; 254 1.1 ad KASSERT((pg->flags & PG_FREE) == 0); 255 1.1 ad if (__predict_false(pcc->count == MAXPGS)) { 256 1.1 ad uvm_pgflcache_spill(ucpu, fl, c); 257 1.1 ad } 258 1.5 ad pg->flags = PG_FREE; 259 1.1 ad pcc->pages[pcc->count] = pg; 260 1.1 ad pcc->count++; 261 1.1 ad return true; 262 1.1 ad } 263 1.1 ad 264 1.1 ad /* 265 1.1 ad * uvm_pgflcache_init: allocate and initialize per-CPU data structures for 266 1.1 ad * the free page cache. Don't set anything in motion - that's taken care 267 1.1 ad * of by uvm_pgflcache_resume(). 268 1.1 ad */ 269 1.1 ad 270 1.1 ad static void 271 1.1 ad uvm_pgflcache_init_cpu(struct cpu_info *ci) 272 1.1 ad { 273 1.1 ad struct uvm_cpu *ucpu; 274 1.1 ad size_t sz; 275 1.1 ad 276 1.1 ad ucpu = ci->ci_data.cpu_uvm; 277 1.1 ad KASSERT(ucpu->pgflcachemem == NULL); 278 1.1 ad KASSERT(ucpu->pgflcache[0] == NULL); 279 1.1 ad 280 1.1 ad sz = offsetof(struct pgflcache, color[uvmexp.ncolors]); 281 1.1 ad ucpu->pgflcachememsz = 282 1.1 ad (roundup2(sz * VM_NFREELIST, coherency_unit) + coherency_unit - 1); 283 1.1 ad ucpu->pgflcachemem = kmem_zalloc(ucpu->pgflcachememsz, KM_SLEEP); 284 1.1 ad } 285 1.1 ad 286 1.1 ad /* 287 1.1 ad * uvm_pgflcache_fini_cpu: dump all cached pages back to global free list 288 1.1 ad * and shut down caching on the CPU. Called on each CPU in the system via 289 1.1 ad * xcall. 290 1.1 ad */ 291 1.1 ad 292 1.1 ad static void 293 1.1 ad uvm_pgflcache_fini_cpu(void *arg1 __unused, void *arg2 __unused) 294 1.1 ad { 295 1.1 ad struct uvm_cpu *ucpu; 296 1.1 ad int fl, color, s; 297 1.1 ad 298 1.1 ad ucpu = curcpu()->ci_data.cpu_uvm; 299 1.1 ad for (fl = 0; fl < VM_NFREELIST; fl++) { 300 1.1 ad s = splvm(); 301 1.1 ad for (color = 0; color < uvmexp.ncolors; color++) { 302 1.1 ad uvm_pgflcache_spill(ucpu, fl, color); 303 1.1 ad } 304 1.1 ad ucpu->pgflcache[fl] = NULL; 305 1.1 ad splx(s); 306 1.1 ad } 307 1.1 ad } 308 1.1 ad 309 1.1 ad /* 310 1.1 ad * uvm_pgflcache_pause: pause operation of the caches 311 1.1 ad */ 312 1.1 ad 313 1.1 ad void 314 1.1 ad uvm_pgflcache_pause(void) 315 1.1 ad { 316 1.1 ad uint64_t where; 317 1.1 ad 318 1.1 ad /* First one in starts draining. Everyone else waits. */ 319 1.1 ad mutex_enter(&uvm_pgflcache_lock); 320 1.1 ad if (uvm_pgflcache_sem++ == 0) { 321 1.4 ad where = xc_broadcast(XC_HIGHPRI, uvm_pgflcache_fini_cpu, 322 1.4 ad (void *)1, NULL); 323 1.1 ad xc_wait(where); 324 1.1 ad } 325 1.1 ad mutex_exit(&uvm_pgflcache_lock); 326 1.1 ad } 327 1.1 ad 328 1.1 ad /* 329 1.1 ad * uvm_pgflcache_resume: resume operation of the caches 330 1.1 ad */ 331 1.1 ad 332 1.1 ad void 333 1.1 ad uvm_pgflcache_resume(void) 334 1.1 ad { 335 1.1 ad CPU_INFO_ITERATOR cii; 336 1.1 ad struct cpu_info *ci; 337 1.1 ad struct uvm_cpu *ucpu; 338 1.1 ad uintptr_t addr; 339 1.1 ad size_t sz; 340 1.1 ad int fl; 341 1.1 ad 342 1.1 ad /* Last guy out takes care of business. */ 343 1.1 ad mutex_enter(&uvm_pgflcache_lock); 344 1.1 ad KASSERT(uvm_pgflcache_sem > 0); 345 1.1 ad if (uvm_pgflcache_sem-- > 1) { 346 1.1 ad mutex_exit(&uvm_pgflcache_lock); 347 1.1 ad return; 348 1.1 ad } 349 1.1 ad 350 1.1 ad /* 351 1.1 ad * Make sure dependant data structure updates are remotely visible. 352 1.1 ad * Essentially this functions as a global memory barrier. 353 1.1 ad */ 354 1.1 ad xc_barrier(XC_HIGHPRI); 355 1.1 ad 356 1.1 ad /* 357 1.1 ad * Then set all of the pointers in place on each CPU. As soon as 358 1.1 ad * each pointer is set, caching is operational in that dimension. 359 1.1 ad */ 360 1.1 ad sz = offsetof(struct pgflcache, color[uvmexp.ncolors]); 361 1.1 ad for (CPU_INFO_FOREACH(cii, ci)) { 362 1.1 ad ucpu = ci->ci_data.cpu_uvm; 363 1.1 ad addr = roundup2((uintptr_t)ucpu->pgflcachemem, coherency_unit); 364 1.1 ad for (fl = 0; fl < VM_NFREELIST; fl++) { 365 1.1 ad ucpu->pgflcache[fl] = (struct pgflcache *)addr; 366 1.1 ad addr += sz; 367 1.1 ad } 368 1.1 ad } 369 1.1 ad mutex_exit(&uvm_pgflcache_lock); 370 1.1 ad } 371 1.1 ad 372 1.1 ad /* 373 1.1 ad * uvm_pgflcache_start: start operation of the cache. 374 1.1 ad * 375 1.1 ad * => called once only, when init(8) is about to be started 376 1.1 ad */ 377 1.1 ad 378 1.1 ad void 379 1.1 ad uvm_pgflcache_start(void) 380 1.1 ad { 381 1.1 ad CPU_INFO_ITERATOR cii; 382 1.1 ad struct cpu_info *ci; 383 1.1 ad 384 1.1 ad KASSERT(uvm_pgflcache_sem > 0); 385 1.1 ad 386 1.1 ad /* 387 1.1 ad * There's not much point doing this if every CPU has its own 388 1.1 ad * bucket (and that includes the uniprocessor case). 389 1.1 ad */ 390 1.1 ad if (ncpu == uvm.bucketcount) { 391 1.1 ad return; 392 1.1 ad } 393 1.1 ad 394 1.2 ad /* Create data structures for each CPU. */ 395 1.1 ad for (CPU_INFO_FOREACH(cii, ci)) { 396 1.1 ad uvm_pgflcache_init_cpu(ci); 397 1.1 ad } 398 1.1 ad 399 1.1 ad /* Kick it into action. */ 400 1.4 ad uvm_pgflcache_resume(); 401 1.1 ad } 402 1.1 ad 403 1.1 ad /* 404 1.1 ad * uvm_pgflcache_init: set up data structures for the free page cache. 405 1.1 ad */ 406 1.1 ad 407 1.1 ad void 408 1.1 ad uvm_pgflcache_init(void) 409 1.1 ad { 410 1.1 ad 411 1.1 ad uvm_pgflcache_sem = 1; 412 1.1 ad mutex_init(&uvm_pgflcache_lock, MUTEX_DEFAULT, IPL_NONE); 413 1.1 ad } 414 1.1 ad 415 1.1 ad #else /* MULTIPROCESSOR */ 416 1.1 ad 417 1.1 ad struct vm_page * 418 1.1 ad uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c) 419 1.1 ad { 420 1.1 ad 421 1.1 ad return NULL; 422 1.1 ad } 423 1.1 ad 424 1.1 ad bool 425 1.1 ad uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg) 426 1.1 ad { 427 1.1 ad 428 1.1 ad return false; 429 1.1 ad } 430 1.1 ad 431 1.1 ad void 432 1.1 ad uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c) 433 1.1 ad { 434 1.1 ad 435 1.1 ad } 436 1.1 ad 437 1.1 ad void 438 1.1 ad uvm_pgflcache_pause(void) 439 1.1 ad { 440 1.1 ad 441 1.1 ad } 442 1.1 ad 443 1.1 ad void 444 1.1 ad uvm_pgflcache_resume(void) 445 1.1 ad { 446 1.1 ad 447 1.1 ad } 448 1.1 ad 449 1.1 ad void 450 1.1 ad uvm_pgflcache_start(void) 451 1.1 ad { 452 1.1 ad 453 1.1 ad } 454 1.1 ad 455 1.1 ad void 456 1.1 ad uvm_pgflcache_init(void) 457 1.1 ad { 458 1.1 ad 459 1.1 ad } 460 1.1 ad 461 1.1 ad #endif /* MULTIPROCESSOR */ 462