1/* 2 * Copyright © 2016-2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <string.h> 25#include <unistd.h> 26#include <sys/types.h> 27#include <sys/mman.h> 28 29#include "aub_mem.h" 30 31#ifndef HAVE_MEMFD_CREATE 32#include <sys/syscall.h> 33 34static inline int 35memfd_create(const char *name, unsigned int flags) 36{ 37 return syscall(SYS_memfd_create, name, flags); 38} 39#endif 40 41struct bo_map { 42 struct list_head link; 43 struct gen_batch_decode_bo bo; 44 bool unmap_after_use; 45 bool ppgtt; 46}; 47 48struct ggtt_entry { 49 struct rb_node node; 50 uint64_t virt_addr; 51 uint64_t phys_addr; 52}; 53 54struct phys_mem { 55 struct rb_node node; 56 uint64_t fd_offset; 57 uint64_t phys_addr; 58 uint8_t *data; 59 const uint8_t *aub_data; 60}; 61 62static void 63add_gtt_bo_map(struct aub_mem *mem, struct gen_batch_decode_bo bo, bool ppgtt, bool unmap_after_use) 64{ 65 struct bo_map *m = calloc(1, sizeof(*m)); 66 67 m->ppgtt = ppgtt; 68 m->bo = bo; 69 m->unmap_after_use = unmap_after_use; 70 list_add(&m->link, &mem->maps); 71} 72 73void 74aub_mem_clear_bo_maps(struct aub_mem *mem) 75{ 76 list_for_each_entry_safe(struct bo_map, i, &mem->maps, link) { 77 if (i->unmap_after_use) 78 munmap((void *)i->bo.map, i->bo.size); 79 list_del(&i->link); 80 free(i); 81 } 82} 83 84static inline struct ggtt_entry * 85ggtt_entry_next(struct ggtt_entry *entry) 86{ 87 if (!entry) 88 return NULL; 89 struct rb_node *node = rb_node_next(&entry->node); 90 if (!node) 91 return NULL; 92 return rb_node_data(struct ggtt_entry, node, node); 93} 94 95static inline int 96cmp_uint64(uint64_t a, uint64_t b) 97{ 98 if (a < b) 99 return -1; 100 if (a > b) 101 return 1; 102 return 0; 103} 104 105static inline int 106cmp_ggtt_entry(const struct rb_node *node, const void *addr) 107{ 108 struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node); 109 return cmp_uint64(entry->virt_addr, *(const uint64_t *)addr); 110} 111 112static struct ggtt_entry * 113ensure_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr) 114{ 115 struct rb_node *node = rb_tree_search_sloppy(&mem->ggtt, &virt_addr, 116 cmp_ggtt_entry); 117 int cmp = 0; 118 if (!node || (cmp = cmp_ggtt_entry(node, &virt_addr))) { 119 struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry)); 120 new_entry->virt_addr = virt_addr; 121 rb_tree_insert_at(&mem->ggtt, node, &new_entry->node, cmp > 0); 122 node = &new_entry->node; 123 } 124 125 return rb_node_data(struct ggtt_entry, node, node); 126} 127 128static struct ggtt_entry * 129search_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr) 130{ 131 virt_addr &= ~0xfff; 132 133 struct rb_node *node = rb_tree_search(&mem->ggtt, &virt_addr, cmp_ggtt_entry); 134 135 if (!node) 136 return NULL; 137 138 return rb_node_data(struct ggtt_entry, node, node); 139} 140 141static inline int 142cmp_phys_mem(const struct rb_node *node, const void *addr) 143{ 144 struct phys_mem *mem = rb_node_data(struct phys_mem, node, node); 145 return cmp_uint64(mem->phys_addr, *(uint64_t *)addr); 146} 147 148static struct phys_mem * 149ensure_phys_mem(struct aub_mem *mem, uint64_t phys_addr) 150{ 151 struct rb_node *node = rb_tree_search_sloppy(&mem->mem, &phys_addr, cmp_phys_mem); 152 int cmp = 0; 153 if (!node || (cmp = cmp_phys_mem(node, &phys_addr))) { 154 struct phys_mem *new_mem = calloc(1, sizeof(*new_mem)); 155 new_mem->phys_addr = phys_addr; 156 new_mem->fd_offset = mem->mem_fd_len; 157 158 MAYBE_UNUSED int ftruncate_res = ftruncate(mem->mem_fd, mem->mem_fd_len += 4096); 159 assert(ftruncate_res == 0); 160 161 new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, 162 mem->mem_fd, new_mem->fd_offset); 163 assert(new_mem->data != MAP_FAILED); 164 165 rb_tree_insert_at(&mem->mem, node, &new_mem->node, cmp > 0); 166 node = &new_mem->node; 167 } 168 169 return rb_node_data(struct phys_mem, node, node); 170} 171 172static struct phys_mem * 173search_phys_mem(struct aub_mem *mem, uint64_t phys_addr) 174{ 175 phys_addr &= ~0xfff; 176 177 struct rb_node *node = rb_tree_search(&mem->mem, &phys_addr, cmp_phys_mem); 178 179 if (!node) 180 return NULL; 181 182 return rb_node_data(struct phys_mem, node, node); 183} 184 185void 186aub_mem_local_write(void *_mem, uint64_t address, 187 const void *data, uint32_t size) 188{ 189 struct aub_mem *mem = _mem; 190 struct gen_batch_decode_bo bo = { 191 .map = data, 192 .addr = address, 193 .size = size, 194 }; 195 add_gtt_bo_map(mem, bo, false, false); 196} 197 198void 199aub_mem_ggtt_entry_write(void *_mem, uint64_t address, 200 const void *_data, uint32_t _size) 201{ 202 struct aub_mem *mem = _mem; 203 uint64_t virt_addr = (address / sizeof(uint64_t)) << 12; 204 const uint64_t *data = _data; 205 size_t size = _size / sizeof(*data); 206 for (const uint64_t *entry = data; 207 entry < data + size; 208 entry++, virt_addr += 4096) { 209 struct ggtt_entry *pt = ensure_ggtt_entry(mem, virt_addr); 210 pt->phys_addr = *entry; 211 } 212} 213 214void 215aub_mem_phys_write(void *_mem, uint64_t phys_address, 216 const void *data, uint32_t size) 217{ 218 struct aub_mem *mem = _mem; 219 uint32_t to_write = size; 220 for (uint64_t page = phys_address & ~0xfff; page < phys_address + size; page += 4096) { 221 struct phys_mem *pmem = ensure_phys_mem(mem, page); 222 uint64_t offset = MAX2(page, phys_address) - page; 223 uint32_t size_this_page = MIN2(to_write, 4096 - offset); 224 to_write -= size_this_page; 225 memcpy(pmem->data + offset, data, size_this_page); 226 pmem->aub_data = data - offset; 227 data = (const uint8_t *)data + size_this_page; 228 } 229} 230 231void 232aub_mem_ggtt_write(void *_mem, uint64_t virt_address, 233 const void *data, uint32_t size) 234{ 235 struct aub_mem *mem = _mem; 236 uint32_t to_write = size; 237 for (uint64_t page = virt_address & ~0xfff; page < virt_address + size; page += 4096) { 238 struct ggtt_entry *entry = search_ggtt_entry(mem, page); 239 assert(entry && entry->phys_addr & 0x1); 240 241 uint64_t offset = MAX2(page, virt_address) - page; 242 uint32_t size_this_page = MIN2(to_write, 4096 - offset); 243 to_write -= size_this_page; 244 245 uint64_t phys_page = entry->phys_addr & ~0xfff; /* Clear the validity bits. */ 246 aub_mem_phys_write(mem, phys_page + offset, data, size_this_page); 247 data = (const uint8_t *)data + size_this_page; 248 } 249} 250 251struct gen_batch_decode_bo 252aub_mem_get_ggtt_bo(void *_mem, uint64_t address) 253{ 254 struct aub_mem *mem = _mem; 255 struct gen_batch_decode_bo bo = {0}; 256 257 list_for_each_entry(struct bo_map, i, &mem->maps, link) 258 if (!i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address) 259 return i->bo; 260 261 address &= ~0xfff; 262 263 struct ggtt_entry *start = 264 (struct ggtt_entry *)rb_tree_search_sloppy(&mem->ggtt, &address, 265 cmp_ggtt_entry); 266 if (start && start->virt_addr < address) 267 start = ggtt_entry_next(start); 268 if (!start) 269 return bo; 270 271 struct ggtt_entry *last = start; 272 for (struct ggtt_entry *i = ggtt_entry_next(last); 273 i && last->virt_addr + 4096 == i->virt_addr; 274 last = i, i = ggtt_entry_next(last)) 275 ; 276 277 bo.addr = MIN2(address, start->virt_addr); 278 bo.size = last->virt_addr - bo.addr + 4096; 279 bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 280 assert(bo.map != MAP_FAILED); 281 282 for (struct ggtt_entry *i = start; 283 i; 284 i = i == last ? NULL : ggtt_entry_next(i)) { 285 uint64_t phys_addr = i->phys_addr & ~0xfff; 286 struct phys_mem *phys_mem = search_phys_mem(mem, phys_addr); 287 288 if (!phys_mem) 289 continue; 290 291 uint32_t map_offset = i->virt_addr - address; 292 MAYBE_UNUSED void *res = 293 mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ, 294 MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); 295 assert(res != MAP_FAILED); 296 } 297 298 add_gtt_bo_map(mem, bo, false, true); 299 300 return bo; 301} 302 303static struct phys_mem * 304ppgtt_walk(struct aub_mem *mem, uint64_t pml4, uint64_t address) 305{ 306 uint64_t shift = 39; 307 uint64_t addr = pml4; 308 for (int level = 4; level > 0; level--) { 309 struct phys_mem *table = search_phys_mem(mem, addr); 310 if (!table) 311 return NULL; 312 int index = (address >> shift) & 0x1ff; 313 uint64_t entry = ((uint64_t *)table->data)[index]; 314 if (!(entry & 1)) 315 return NULL; 316 addr = entry & ~0xfff; 317 shift -= 9; 318 } 319 return search_phys_mem(mem, addr); 320} 321 322static bool 323ppgtt_mapped(struct aub_mem *mem, uint64_t pml4, uint64_t address) 324{ 325 return ppgtt_walk(mem, pml4, address) != NULL; 326} 327 328struct gen_batch_decode_bo 329aub_mem_get_ppgtt_bo(void *_mem, uint64_t address) 330{ 331 struct aub_mem *mem = _mem; 332 struct gen_batch_decode_bo bo = {0}; 333 334 list_for_each_entry(struct bo_map, i, &mem->maps, link) 335 if (i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address) 336 return i->bo; 337 338 address &= ~0xfff; 339 340 if (!ppgtt_mapped(mem, mem->pml4, address)) 341 return bo; 342 343 /* Map everything until the first gap since we don't know how much the 344 * decoder actually needs. 345 */ 346 uint64_t end = address; 347 while (ppgtt_mapped(mem, mem->pml4, end)) 348 end += 4096; 349 350 bo.addr = address; 351 bo.size = end - address; 352 bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 353 assert(bo.map != MAP_FAILED); 354 355 for (uint64_t page = address; page < end; page += 4096) { 356 struct phys_mem *phys_mem = ppgtt_walk(mem, mem->pml4, page); 357 358 MAYBE_UNUSED void *res = 359 mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ, 360 MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); 361 assert(res != MAP_FAILED); 362 } 363 364 add_gtt_bo_map(mem, bo, true, true); 365 366 return bo; 367} 368 369bool 370aub_mem_init(struct aub_mem *mem) 371{ 372 memset(mem, 0, sizeof(*mem)); 373 374 list_inithead(&mem->maps); 375 376 mem->mem_fd = memfd_create("phys memory", 0); 377 378 return mem->mem_fd != -1; 379} 380 381void 382aub_mem_fini(struct aub_mem *mem) 383{ 384 if (mem->mem_fd == -1) 385 return; 386 387 aub_mem_clear_bo_maps(mem); 388 389 390 rb_tree_foreach_safe(struct ggtt_entry, entry, &mem->ggtt, node) { 391 rb_tree_remove(&mem->ggtt, &entry->node); 392 free(entry); 393 } 394 rb_tree_foreach_safe(struct phys_mem, entry, &mem->mem, node) { 395 rb_tree_remove(&mem->mem, &entry->node); 396 free(entry); 397 } 398 399 close(mem->mem_fd); 400 mem->mem_fd = -1; 401} 402 403struct gen_batch_decode_bo 404aub_mem_get_phys_addr_data(struct aub_mem *mem, uint64_t phys_addr) 405{ 406 struct phys_mem *page = search_phys_mem(mem, phys_addr); 407 return page ? 408 (struct gen_batch_decode_bo) { .map = page->data, .addr = page->phys_addr, .size = 4096 } : 409 (struct gen_batch_decode_bo) {}; 410} 411 412struct gen_batch_decode_bo 413aub_mem_get_ppgtt_addr_data(struct aub_mem *mem, uint64_t virt_addr) 414{ 415 struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr); 416 return page ? 417 (struct gen_batch_decode_bo) { .map = page->data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } : 418 (struct gen_batch_decode_bo) {}; 419} 420 421struct gen_batch_decode_bo 422aub_mem_get_ppgtt_addr_aub_data(struct aub_mem *mem, uint64_t virt_addr) 423{ 424 struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr); 425 return page ? 426 (struct gen_batch_decode_bo) { .map = page->aub_data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } : 427 (struct gen_batch_decode_bo) {}; 428} 429