aub_mem.c revision 01e04c3f
1/* 2 * Copyright © 2016-2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <string.h> 25#include <unistd.h> 26#include <sys/types.h> 27#include <sys/mman.h> 28 29#include "aub_mem.h" 30 31#ifndef HAVE_MEMFD_CREATE 32#include <sys/syscall.h> 33 34static inline int 35memfd_create(const char *name, unsigned int flags) 36{ 37 return syscall(SYS_memfd_create, name, flags); 38} 39#endif 40 41struct bo_map { 42 struct list_head link; 43 struct gen_batch_decode_bo bo; 44 bool unmap_after_use; 45 bool ppgtt; 46}; 47 48struct ggtt_entry { 49 struct rb_node node; 50 uint64_t virt_addr; 51 uint64_t phys_addr; 52}; 53 54struct phys_mem { 55 struct rb_node node; 56 uint64_t fd_offset; 57 uint64_t phys_addr; 58 uint8_t *data; 59 const uint8_t *aub_data; 60}; 61 62static void 63add_gtt_bo_map(struct aub_mem *mem, struct gen_batch_decode_bo bo, bool ppgtt, bool unmap_after_use) 64{ 65 struct bo_map *m = calloc(1, sizeof(*m)); 66 67 m->ppgtt = ppgtt; 68 m->bo = bo; 69 m->unmap_after_use = unmap_after_use; 70 list_add(&m->link, &mem->maps); 71} 72 73void 74aub_mem_clear_bo_maps(struct aub_mem *mem) 75{ 76 list_for_each_entry_safe(struct bo_map, i, &mem->maps, link) { 77 if (i->unmap_after_use) 78 munmap((void *)i->bo.map, i->bo.size); 79 list_del(&i->link); 80 free(i); 81 } 82} 83 84static inline struct ggtt_entry * 85ggtt_entry_next(struct ggtt_entry *entry) 86{ 87 if (!entry) 88 return NULL; 89 struct rb_node *node = rb_node_next(&entry->node); 90 if (!node) 91 return NULL; 92 return rb_node_data(struct ggtt_entry, node, node); 93} 94 95static inline int 96cmp_uint64(uint64_t a, uint64_t b) 97{ 98 if (a < b) 99 return -1; 100 if (a > b) 101 return 1; 102 return 0; 103} 104 105static inline int 106cmp_ggtt_entry(const struct rb_node *node, const void *addr) 107{ 108 struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node); 109 return cmp_uint64(entry->virt_addr, *(const uint64_t *)addr); 110} 111 112static struct ggtt_entry * 113ensure_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr) 114{ 115 struct rb_node *node = rb_tree_search_sloppy(&mem->ggtt, &virt_addr, 116 cmp_ggtt_entry); 117 int cmp = 0; 118 if (!node || (cmp = cmp_ggtt_entry(node, &virt_addr))) { 119 struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry)); 120 new_entry->virt_addr = virt_addr; 121 rb_tree_insert_at(&mem->ggtt, node, &new_entry->node, cmp > 0); 122 node = &new_entry->node; 123 } 124 125 return rb_node_data(struct ggtt_entry, node, node); 126} 127 128static struct ggtt_entry * 129search_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr) 130{ 131 virt_addr &= ~0xfff; 132 133 struct rb_node *node = rb_tree_search(&mem->ggtt, &virt_addr, cmp_ggtt_entry); 134 135 if (!node) 136 return NULL; 137 138 return rb_node_data(struct ggtt_entry, node, node); 139} 140 141static inline int 142cmp_phys_mem(const struct rb_node *node, const void *addr) 143{ 144 struct phys_mem *mem = rb_node_data(struct phys_mem, node, node); 145 return cmp_uint64(mem->phys_addr, *(uint64_t *)addr); 146} 147 148static struct phys_mem * 149ensure_phys_mem(struct aub_mem *mem, uint64_t phys_addr) 150{ 151 struct rb_node *node = rb_tree_search_sloppy(&mem->mem, &phys_addr, cmp_phys_mem); 152 int cmp = 0; 153 if (!node || (cmp = cmp_phys_mem(node, &phys_addr))) { 154 struct phys_mem *new_mem = calloc(1, sizeof(*new_mem)); 155 new_mem->phys_addr = phys_addr; 156 new_mem->fd_offset = mem->mem_fd_len; 157 158 MAYBE_UNUSED int ftruncate_res = ftruncate(mem->mem_fd, mem->mem_fd_len += 4096); 159 assert(ftruncate_res == 0); 160 161 new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, 162 mem->mem_fd, new_mem->fd_offset); 163 assert(new_mem->data != MAP_FAILED); 164 165 rb_tree_insert_at(&mem->mem, node, &new_mem->node, cmp > 0); 166 node = &new_mem->node; 167 } 168 169 return rb_node_data(struct phys_mem, node, node); 170} 171 172static struct phys_mem * 173search_phys_mem(struct aub_mem *mem, uint64_t phys_addr) 174{ 175 phys_addr &= ~0xfff; 176 177 struct rb_node *node = rb_tree_search(&mem->mem, &phys_addr, cmp_phys_mem); 178 179 if (!node) 180 return NULL; 181 182 return rb_node_data(struct phys_mem, node, node); 183} 184 185void 186aub_mem_local_write(void *_mem, uint64_t address, 187 const void *data, uint32_t size) 188{ 189 struct aub_mem *mem = _mem; 190 struct gen_batch_decode_bo bo = { 191 .map = data, 192 .addr = address, 193 .size = size, 194 }; 195 add_gtt_bo_map(mem, bo, false, false); 196} 197 198void 199aub_mem_ggtt_entry_write(void *_mem, uint64_t address, 200 const void *_data, uint32_t _size) 201{ 202 struct aub_mem *mem = _mem; 203 uint64_t virt_addr = (address / sizeof(uint64_t)) << 12; 204 const uint64_t *data = _data; 205 size_t size = _size / sizeof(*data); 206 for (const uint64_t *entry = data; 207 entry < data + size; 208 entry++, virt_addr += 4096) { 209 struct ggtt_entry *pt = ensure_ggtt_entry(mem, virt_addr); 210 pt->phys_addr = *entry; 211 } 212} 213 214void 215aub_mem_phys_write(void *_mem, uint64_t phys_address, 216 const void *data, uint32_t size) 217{ 218 struct aub_mem *mem = _mem; 219 uint32_t to_write = size; 220 for (uint64_t page = phys_address & ~0xfff; page < phys_address + size; page += 4096) { 221 struct phys_mem *pmem = ensure_phys_mem(mem, page); 222 uint64_t offset = MAX2(page, phys_address) - page; 223 uint32_t size_this_page = MIN2(to_write, 4096 - offset); 224 to_write -= size_this_page; 225 memcpy(pmem->data + offset, data, size_this_page); 226 pmem->aub_data = data - offset; 227 data = (const uint8_t *)data + size_this_page; 228 } 229} 230 231void 232aub_mem_ggtt_write(void *_mem, uint64_t virt_address, 233 const void *data, uint32_t size) 234{ 235 struct aub_mem *mem = _mem; 236 uint32_t to_write = size; 237 for (uint64_t page = virt_address & ~0xfff; page < virt_address + size; page += 4096) { 238 struct ggtt_entry *entry = search_ggtt_entry(mem, page); 239 assert(entry && entry->phys_addr & 0x1); 240 241 uint64_t offset = MAX2(page, virt_address) - page; 242 uint32_t size_this_page = MIN2(to_write, 4096 - offset); 243 to_write -= size_this_page; 244 245 uint64_t phys_page = entry->phys_addr & ~0xfff; /* Clear the validity bits. */ 246 aub_mem_phys_write(mem, phys_page + offset, data, size_this_page); 247 data = (const uint8_t *)data + size_this_page; 248 } 249} 250 251struct gen_batch_decode_bo 252aub_mem_get_ggtt_bo(void *_mem, uint64_t address) 253{ 254 struct aub_mem *mem = _mem; 255 struct gen_batch_decode_bo bo = {0}; 256 257 list_for_each_entry(struct bo_map, i, &mem->maps, link) 258 if (!i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address) 259 return i->bo; 260 261 address &= ~0xfff; 262 263 struct ggtt_entry *start = 264 (struct ggtt_entry *)rb_tree_search_sloppy(&mem->ggtt, &address, 265 cmp_ggtt_entry); 266 if (start && start->virt_addr < address) 267 start = ggtt_entry_next(start); 268 if (!start) 269 return bo; 270 271 struct ggtt_entry *last = start; 272 for (struct ggtt_entry *i = ggtt_entry_next(last); 273 i && last->virt_addr + 4096 == i->virt_addr; 274 last = i, i = ggtt_entry_next(last)) 275 ; 276 277 bo.addr = MIN2(address, start->virt_addr); 278 bo.size = last->virt_addr - bo.addr + 4096; 279 bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 280 assert(bo.map != MAP_FAILED); 281 282 for (struct ggtt_entry *i = start; 283 i; 284 i = i == last ? NULL : ggtt_entry_next(i)) { 285 uint64_t phys_addr = i->phys_addr & ~0xfff; 286 struct phys_mem *phys_mem = search_phys_mem(mem, phys_addr); 287 288 if (!phys_mem) 289 continue; 290 291 uint32_t map_offset = i->virt_addr - address; 292 void *res = mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ, 293 MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); 294 assert(res != MAP_FAILED); 295 } 296 297 add_gtt_bo_map(mem, bo, false, true); 298 299 return bo; 300} 301 302static struct phys_mem * 303ppgtt_walk(struct aub_mem *mem, uint64_t pml4, uint64_t address) 304{ 305 uint64_t shift = 39; 306 uint64_t addr = pml4; 307 for (int level = 4; level > 0; level--) { 308 struct phys_mem *table = search_phys_mem(mem, addr); 309 if (!table) 310 return NULL; 311 int index = (address >> shift) & 0x1ff; 312 uint64_t entry = ((uint64_t *)table->data)[index]; 313 if (!(entry & 1)) 314 return NULL; 315 addr = entry & ~0xfff; 316 shift -= 9; 317 } 318 return search_phys_mem(mem, addr); 319} 320 321static bool 322ppgtt_mapped(struct aub_mem *mem, uint64_t pml4, uint64_t address) 323{ 324 return ppgtt_walk(mem, pml4, address) != NULL; 325} 326 327struct gen_batch_decode_bo 328aub_mem_get_ppgtt_bo(void *_mem, uint64_t address) 329{ 330 struct aub_mem *mem = _mem; 331 struct gen_batch_decode_bo bo = {0}; 332 333 list_for_each_entry(struct bo_map, i, &mem->maps, link) 334 if (i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address) 335 return i->bo; 336 337 address &= ~0xfff; 338 339 if (!ppgtt_mapped(mem, mem->pml4, address)) 340 return bo; 341 342 /* Map everything until the first gap since we don't know how much the 343 * decoder actually needs. 344 */ 345 uint64_t end = address; 346 while (ppgtt_mapped(mem, mem->pml4, end)) 347 end += 4096; 348 349 bo.addr = address; 350 bo.size = end - address; 351 bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 352 assert(bo.map != MAP_FAILED); 353 354 for (uint64_t page = address; page < end; page += 4096) { 355 struct phys_mem *phys_mem = ppgtt_walk(mem, mem->pml4, page); 356 357 void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ, 358 MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); 359 assert(res != MAP_FAILED); 360 } 361 362 add_gtt_bo_map(mem, bo, true, true); 363 364 return bo; 365} 366 367bool 368aub_mem_init(struct aub_mem *mem) 369{ 370 memset(mem, 0, sizeof(*mem)); 371 372 list_inithead(&mem->maps); 373 374 mem->mem_fd = memfd_create("phys memory", 0); 375 376 return mem->mem_fd != -1; 377} 378 379void 380aub_mem_fini(struct aub_mem *mem) 381{ 382 if (mem->mem_fd == -1) 383 return; 384 385 aub_mem_clear_bo_maps(mem); 386 387 388 rb_tree_foreach_safe(struct ggtt_entry, entry, &mem->ggtt, node) { 389 rb_tree_remove(&mem->ggtt, &entry->node); 390 free(entry); 391 } 392 rb_tree_foreach_safe(struct phys_mem, entry, &mem->mem, node) { 393 rb_tree_remove(&mem->mem, &entry->node); 394 free(entry); 395 } 396 397 close(mem->mem_fd); 398 mem->mem_fd = -1; 399} 400 401struct gen_batch_decode_bo 402aub_mem_get_phys_addr_data(struct aub_mem *mem, uint64_t phys_addr) 403{ 404 struct phys_mem *page = search_phys_mem(mem, phys_addr); 405 return page ? 406 (struct gen_batch_decode_bo) { .map = page->data, .addr = page->phys_addr, .size = 4096 } : 407 (struct gen_batch_decode_bo) {}; 408} 409 410struct gen_batch_decode_bo 411aub_mem_get_ppgtt_addr_data(struct aub_mem *mem, uint64_t virt_addr) 412{ 413 struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr); 414 return page ? 415 (struct gen_batch_decode_bo) { .map = page->data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } : 416 (struct gen_batch_decode_bo) {}; 417} 418 419struct gen_batch_decode_bo 420aub_mem_get_ppgtt_addr_aub_data(struct aub_mem *mem, uint64_t virt_addr) 421{ 422 struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr); 423 return page ? 424 (struct gen_batch_decode_bo) { .map = page->aub_data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } : 425 (struct gen_batch_decode_bo) {}; 426} 427