1/* 2 * Copyright (c) 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * The aux map provides a multi-level lookup of the main surface address which 26 * ends up providing information about the auxiliary surface data, including 27 * the address where the auxiliary data resides. 28 * 29 * The 48-bit VMA (GPU) address of the main surface is split to do the address 30 * lookup: 31 * 32 * 48 bit address of main surface 33 * +--------+--------+--------+------+ 34 * | 47:36 | 35:24 | 23:16 | 15:0 | 35 * | L3-idx | L2-idx | L1-idx | ... | 36 * +--------+--------+--------+------+ 37 * 38 * The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is 39 * located by indexing into this buffer as a uint64_t array using the L3-idx 40 * value. The 64-bit L3 entry is defined as: 41 * 42 * +-------+-------------+------+---+ 43 * | 63:48 | 47:15 | 14:1 | 0 | 44 * | ... | L2-tbl-addr | ... | V | 45 * +-------+-------------+------+---+ 46 * 47 * If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for 48 * the level-2 table entries, with the lower address bits filled with zero. 49 * The L2 Table Entry is located by indexing into this buffer as a uint64_t 50 * array using the L2-idx value. The 64-bit L2 entry is similar to the L3 51 * entry, except with 2 additional address bits: 52 * 53 * +-------+-------------+------+---+ 54 * | 63:48 | 47:13 | 12:1 | 0 | 55 * | ... | L1-tbl-addr | ... | V | 56 * +-------+-------------+------+---+ 57 * 58 * If the `V` bit is set, then the L1-tbl-addr gives the address for the 59 * level-1 table entries, with the lower address bits filled with zero. The L1 60 * Table Entry is located by indexing into this buffer as a uint64_t array 61 * using the L1-idx value. The 64-bit L1 entry is defined as: 62 * 63 * +--------+------+-------+-------+-------+---------------+-----+---+ 64 * | 63:58 | 57 | 56:54 | 53:52 | 51:48 | 47:8 | 7:1 | 0 | 65 * | Format | Y/Cr | Depth | TM | ... | aux-data-addr | ... | V | 66 * +--------+------+-------+-------+-------+---------------+-----+---+ 67 * 68 * Where: 69 * - Format: See `get_format_encoding` 70 * - Y/Cr: 0=Y(Luma), 1=Cr(Chroma) 71 * - (bit) Depth: See `get_bpp_encoding` 72 * - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd 73 * - aux-data-addr: VMA/GPU address for the aux-data 74 * - V: entry is valid 75 */ 76 77#include "intel_aux_map.h" 78#include "intel_gem.h" 79 80#include "dev/intel_device_info.h" 81#include "isl/isl.h" 82 83#include "drm-uapi/i915_drm.h" 84#include "util/list.h" 85#include "util/ralloc.h" 86#include "util/u_atomic.h" 87#include "main/macros.h" 88 89#include <inttypes.h> 90#include <stdlib.h> 91#include <stdio.h> 92#include <pthread.h> 93 94static const bool aux_map_debug = false; 95 96struct aux_map_buffer { 97 struct list_head link; 98 struct intel_buffer *buffer; 99}; 100 101struct intel_aux_map_context { 102 void *driver_ctx; 103 pthread_mutex_t mutex; 104 struct intel_mapped_pinned_buffer_alloc *buffer_alloc; 105 uint32_t num_buffers; 106 struct list_head buffers; 107 uint64_t level3_base_addr; 108 uint64_t *level3_map; 109 uint32_t tail_offset, tail_remaining; 110 uint32_t state_num; 111}; 112 113static bool 114add_buffer(struct intel_aux_map_context *ctx) 115{ 116 struct aux_map_buffer *buf = ralloc(ctx, struct aux_map_buffer); 117 if (!buf) 118 return false; 119 120 const uint32_t size = 0x100000; 121 buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size); 122 if (!buf->buffer) { 123 ralloc_free(buf); 124 return false; 125 } 126 127 assert(buf->buffer->map != NULL); 128 129 list_addtail(&buf->link, &ctx->buffers); 130 ctx->tail_offset = 0; 131 ctx->tail_remaining = size; 132 p_atomic_inc(&ctx->num_buffers); 133 134 return true; 135} 136 137static void 138advance_current_pos(struct intel_aux_map_context *ctx, uint32_t size) 139{ 140 assert(ctx->tail_remaining >= size); 141 ctx->tail_remaining -= size; 142 ctx->tail_offset += size; 143} 144 145static bool 146align_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size, 147 uint32_t align) 148{ 149 if (ctx->tail_remaining < size) 150 return false; 151 152 struct aux_map_buffer *tail = 153 list_last_entry(&ctx->buffers, struct aux_map_buffer, link); 154 uint64_t gpu = tail->buffer->gpu + ctx->tail_offset; 155 uint64_t aligned = align64(gpu, align); 156 157 if ((aligned - gpu) + size > ctx->tail_remaining) { 158 return false; 159 } else { 160 if (aligned - gpu > 0) 161 advance_current_pos(ctx, aligned - gpu); 162 return true; 163 } 164} 165 166static void 167get_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map) 168{ 169 assert(!list_is_empty(&ctx->buffers)); 170 struct aux_map_buffer *tail = 171 list_last_entry(&ctx->buffers, struct aux_map_buffer, link); 172 if (gpu) 173 *gpu = tail->buffer->gpu + ctx->tail_offset; 174 if (map) 175 *map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset); 176} 177 178static bool 179add_sub_table(struct intel_aux_map_context *ctx, uint32_t size, 180 uint32_t align, uint64_t *gpu, uint64_t **map) 181{ 182 if (!align_and_verify_space(ctx, size, align)) { 183 if (!add_buffer(ctx)) 184 return false; 185 UNUSED bool aligned = align_and_verify_space(ctx, size, align); 186 assert(aligned); 187 } 188 get_current_pos(ctx, gpu, map); 189 memset(*map, 0, size); 190 advance_current_pos(ctx, size); 191 return true; 192} 193 194uint32_t 195intel_aux_map_get_state_num(struct intel_aux_map_context *ctx) 196{ 197 return p_atomic_read(&ctx->state_num); 198} 199 200struct intel_aux_map_context * 201intel_aux_map_init(void *driver_ctx, 202 struct intel_mapped_pinned_buffer_alloc *buffer_alloc, 203 const struct intel_device_info *devinfo) 204{ 205 struct intel_aux_map_context *ctx; 206 if (devinfo->ver < 12) 207 return NULL; 208 209 ctx = ralloc(NULL, struct intel_aux_map_context); 210 if (!ctx) 211 return NULL; 212 213 if (pthread_mutex_init(&ctx->mutex, NULL)) 214 return NULL; 215 216 ctx->driver_ctx = driver_ctx; 217 ctx->buffer_alloc = buffer_alloc; 218 ctx->num_buffers = 0; 219 list_inithead(&ctx->buffers); 220 ctx->tail_offset = 0; 221 ctx->tail_remaining = 0; 222 ctx->state_num = 0; 223 224 if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &ctx->level3_base_addr, 225 &ctx->level3_map)) { 226 if (aux_map_debug) 227 fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n", 228 ctx->level3_base_addr, ctx->level3_map); 229 p_atomic_inc(&ctx->state_num); 230 return ctx; 231 } else { 232 ralloc_free(ctx); 233 return NULL; 234 } 235} 236 237void 238intel_aux_map_finish(struct intel_aux_map_context *ctx) 239{ 240 if (!ctx) 241 return; 242 243 pthread_mutex_destroy(&ctx->mutex); 244 list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) { 245 ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer); 246 list_del(&buf->link); 247 p_atomic_dec(&ctx->num_buffers); 248 ralloc_free(buf); 249 } 250 251 ralloc_free(ctx); 252} 253 254uint64_t 255intel_aux_map_get_base(struct intel_aux_map_context *ctx) 256{ 257 /** 258 * This get initialized in intel_aux_map_init, and never changes, so there is 259 * no need to lock the mutex. 260 */ 261 return ctx->level3_base_addr; 262} 263 264static struct aux_map_buffer * 265find_buffer(struct intel_aux_map_context *ctx, uint64_t addr) 266{ 267 list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) { 268 if (buf->buffer->gpu <= addr && buf->buffer->gpu_end > addr) { 269 return buf; 270 } 271 } 272 return NULL; 273} 274 275static uint64_t * 276get_u64_entry_ptr(struct intel_aux_map_context *ctx, uint64_t addr) 277{ 278 struct aux_map_buffer *buf = find_buffer(ctx, addr); 279 assert(buf); 280 uintptr_t map_offset = addr - buf->buffer->gpu; 281 return (uint64_t*)((uint8_t*)buf->buffer->map + map_offset); 282} 283 284static uint8_t 285get_bpp_encoding(enum isl_format format) 286{ 287 if (isl_format_is_yuv(format)) { 288 switch (format) { 289 case ISL_FORMAT_YCRCB_NORMAL: 290 case ISL_FORMAT_YCRCB_SWAPY: 291 case ISL_FORMAT_PLANAR_420_8: return 3; 292 case ISL_FORMAT_PLANAR_420_12: return 2; 293 case ISL_FORMAT_PLANAR_420_10: return 1; 294 case ISL_FORMAT_PLANAR_420_16: return 0; 295 default: 296 unreachable("Unsupported format!"); 297 return 0; 298 } 299 } else { 300 switch (isl_format_get_layout(format)->bpb) { 301 case 16: return 0; 302 case 8: return 4; 303 case 32: return 5; 304 case 64: return 6; 305 case 128: return 7; 306 default: 307 unreachable("Unsupported bpp!"); 308 return 0; 309 } 310 } 311} 312 313#define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT (0x1ull << 52) 314 315uint64_t 316intel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format, 317 uint8_t plane) 318{ 319 if (aux_map_debug) 320 fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n", 321 isl_format_get_name(format), 322 isl_format_get_aux_map_encoding(format)); 323 324 assert(isl_tiling_is_any_y(tiling)); 325 326 uint64_t format_bits = 327 ((uint64_t)isl_format_get_aux_map_encoding(format) << 58) | 328 ((uint64_t)(plane > 0) << 57) | 329 ((uint64_t)get_bpp_encoding(format) << 54) | 330 INTEL_AUX_MAP_ENTRY_Y_TILED_BIT; 331 332 assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits); 333 334 return format_bits; 335} 336 337uint64_t 338intel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf) 339{ 340 assert(!isl_format_is_planar(isl_surf->format)); 341 return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0); 342} 343 344static void 345get_aux_entry(struct intel_aux_map_context *ctx, uint64_t address, 346 uint32_t *l1_index_out, uint64_t *l1_entry_addr_out, 347 uint64_t **l1_entry_map_out) 348{ 349 uint32_t l3_index = (address >> 36) & 0xfff; 350 uint64_t *l3_entry = &ctx->level3_map[l3_index]; 351 352 uint64_t *l2_map; 353 if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 354 uint64_t l2_gpu; 355 if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &l2_gpu, &l2_map)) { 356 if (aux_map_debug) 357 fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n", 358 l3_index, l2_gpu, l2_map); 359 } else { 360 unreachable("Failed to add L2 Aux-Map Page Table!"); 361 } 362 *l3_entry = (l2_gpu & 0xffffffff8000ULL) | 1; 363 } else { 364 uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL); 365 l2_map = get_u64_entry_ptr(ctx, l2_addr); 366 } 367 uint32_t l2_index = (address >> 24) & 0xfff; 368 uint64_t *l2_entry = &l2_map[l2_index]; 369 370 uint64_t l1_addr, *l1_map; 371 if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 372 if (add_sub_table(ctx, 8 * 1024, 8 * 1024, &l1_addr, &l1_map)) { 373 if (aux_map_debug) 374 fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n", 375 l2_index, l1_addr, l1_map); 376 } else { 377 unreachable("Failed to add L1 Aux-Map Page Table!"); 378 } 379 *l2_entry = (l1_addr & 0xffffffffe000ULL) | 1; 380 } else { 381 l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL); 382 l1_map = get_u64_entry_ptr(ctx, l1_addr); 383 } 384 uint32_t l1_index = (address >> 16) & 0xff; 385 if (l1_index_out) 386 *l1_index_out = l1_index; 387 if (l1_entry_addr_out) 388 *l1_entry_addr_out = l1_addr + l1_index * sizeof(*l1_map); 389 if (l1_entry_map_out) 390 *l1_entry_map_out = &l1_map[l1_index]; 391} 392 393static void 394add_mapping(struct intel_aux_map_context *ctx, uint64_t address, 395 uint64_t aux_address, uint64_t format_bits, 396 bool *state_changed) 397{ 398 if (aux_map_debug) 399 fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", address, 400 aux_address); 401 402 uint32_t l1_index; 403 uint64_t *l1_entry; 404 get_aux_entry(ctx, address, &l1_index, NULL, &l1_entry); 405 406 const uint64_t l1_data = 407 (aux_address & INTEL_AUX_MAP_ADDRESS_MASK) | 408 format_bits | 409 INTEL_AUX_MAP_ENTRY_VALID_BIT; 410 411 const uint64_t current_l1_data = *l1_entry; 412 if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 413 assert((aux_address & 0xffULL) == 0); 414 if (aux_map_debug) 415 fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n", 416 l1_index, current_l1_data, l1_data); 417 /** 418 * We use non-zero bits in 63:1 to indicate the entry had been filled 419 * previously. If these bits are non-zero and they don't exactly match 420 * what we want to program into the entry, then we must force the 421 * aux-map tables to be flushed. 422 */ 423 if (current_l1_data != 0 && \ 424 (current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data) 425 *state_changed = true; 426 *l1_entry = l1_data; 427 } else { 428 if (aux_map_debug) 429 fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n", 430 l1_index); 431 assert(*l1_entry == l1_data); 432 } 433} 434 435uint64_t * 436intel_aux_map_get_entry(struct intel_aux_map_context *ctx, 437 uint64_t address, 438 uint64_t *entry_address) 439{ 440 pthread_mutex_lock(&ctx->mutex); 441 uint64_t *l1_entry_map; 442 get_aux_entry(ctx, address, NULL, entry_address, &l1_entry_map); 443 pthread_mutex_unlock(&ctx->mutex); 444 445 return l1_entry_map; 446} 447 448void 449intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t address, 450 uint64_t aux_address, uint64_t main_size_B, 451 uint64_t format_bits) 452{ 453 bool state_changed = false; 454 pthread_mutex_lock(&ctx->mutex); 455 uint64_t map_addr = address; 456 uint64_t dest_aux_addr = aux_address; 457 assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address); 458 assert(align64(aux_address, INTEL_AUX_MAP_AUX_PAGE_SIZE) == aux_address); 459 while (map_addr - address < main_size_B) { 460 add_mapping(ctx, map_addr, dest_aux_addr, format_bits, &state_changed); 461 map_addr += INTEL_AUX_MAP_MAIN_PAGE_SIZE; 462 dest_aux_addr += INTEL_AUX_MAP_AUX_PAGE_SIZE; 463 } 464 pthread_mutex_unlock(&ctx->mutex); 465 if (state_changed) 466 p_atomic_inc(&ctx->state_num); 467} 468 469/** 470 * We mark the leaf entry as invalid, but we don't attempt to cleanup the 471 * other levels of translation mappings. Since we attempt to re-use VMA 472 * ranges, hopefully this will not lead to unbounded growth of the translation 473 * tables. 474 */ 475static void 476remove_mapping(struct intel_aux_map_context *ctx, uint64_t address, 477 bool *state_changed) 478{ 479 uint32_t l3_index = (address >> 36) & 0xfff; 480 uint64_t *l3_entry = &ctx->level3_map[l3_index]; 481 482 uint64_t *l2_map; 483 if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 484 return; 485 } else { 486 uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL); 487 l2_map = get_u64_entry_ptr(ctx, l2_addr); 488 } 489 uint32_t l2_index = (address >> 24) & 0xfff; 490 uint64_t *l2_entry = &l2_map[l2_index]; 491 492 uint64_t *l1_map; 493 if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 494 return; 495 } else { 496 uint64_t l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL); 497 l1_map = get_u64_entry_ptr(ctx, l1_addr); 498 } 499 uint32_t l1_index = (address >> 16) & 0xff; 500 uint64_t *l1_entry = &l1_map[l1_index]; 501 502 const uint64_t current_l1_data = *l1_entry; 503 const uint64_t l1_data = current_l1_data & ~1ull; 504 505 if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 506 return; 507 } else { 508 if (aux_map_debug) 509 fprintf(stderr, "AUX-MAP [0x%x][0x%x][0x%x] L1 entry removed!\n", 510 l3_index, l2_index, l1_index); 511 /** 512 * We use non-zero bits in 63:1 to indicate the entry had been filled 513 * previously. In the unlikely event that these are all zero, we force a 514 * flush of the aux-map tables. 515 */ 516 if (unlikely(l1_data == 0)) 517 *state_changed = true; 518 *l1_entry = l1_data; 519 } 520} 521 522void 523intel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t address, 524 uint64_t size) 525{ 526 bool state_changed = false; 527 pthread_mutex_lock(&ctx->mutex); 528 if (aux_map_debug) 529 fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", address, 530 address + size); 531 532 uint64_t map_addr = address; 533 assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address); 534 while (map_addr - address < size) { 535 remove_mapping(ctx, map_addr, &state_changed); 536 map_addr += 64 * 1024; 537 } 538 pthread_mutex_unlock(&ctx->mutex); 539 if (state_changed) 540 p_atomic_inc(&ctx->state_num); 541} 542 543uint32_t 544intel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx) 545{ 546 return p_atomic_read(&ctx->num_buffers); 547} 548 549void 550intel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos, 551 uint32_t max_bos) 552{ 553 assert(p_atomic_read(&ctx->num_buffers) >= max_bos); 554 uint32_t i = 0; 555 list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) { 556 if (i >= max_bos) 557 return; 558 driver_bos[i++] = buf->buffer->driver_bo; 559 } 560} 561