17ec681f3Smrg/* 27ec681f3Smrg * Copyright (c) 2018 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg/** 257ec681f3Smrg * The aux map provides a multi-level lookup of the main surface address which 267ec681f3Smrg * ends up providing information about the auxiliary surface data, including 277ec681f3Smrg * the address where the auxiliary data resides. 287ec681f3Smrg * 297ec681f3Smrg * The 48-bit VMA (GPU) address of the main surface is split to do the address 307ec681f3Smrg * lookup: 317ec681f3Smrg * 327ec681f3Smrg * 48 bit address of main surface 337ec681f3Smrg * +--------+--------+--------+------+ 347ec681f3Smrg * | 47:36 | 35:24 | 23:16 | 15:0 | 357ec681f3Smrg * | L3-idx | L2-idx | L1-idx | ... | 367ec681f3Smrg * +--------+--------+--------+------+ 377ec681f3Smrg * 387ec681f3Smrg * The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is 397ec681f3Smrg * located by indexing into this buffer as a uint64_t array using the L3-idx 407ec681f3Smrg * value. The 64-bit L3 entry is defined as: 417ec681f3Smrg * 427ec681f3Smrg * +-------+-------------+------+---+ 437ec681f3Smrg * | 63:48 | 47:15 | 14:1 | 0 | 447ec681f3Smrg * | ... | L2-tbl-addr | ... | V | 457ec681f3Smrg * +-------+-------------+------+---+ 467ec681f3Smrg * 477ec681f3Smrg * If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for 487ec681f3Smrg * the level-2 table entries, with the lower address bits filled with zero. 497ec681f3Smrg * The L2 Table Entry is located by indexing into this buffer as a uint64_t 507ec681f3Smrg * array using the L2-idx value. The 64-bit L2 entry is similar to the L3 517ec681f3Smrg * entry, except with 2 additional address bits: 527ec681f3Smrg * 537ec681f3Smrg * +-------+-------------+------+---+ 547ec681f3Smrg * | 63:48 | 47:13 | 12:1 | 0 | 557ec681f3Smrg * | ... | L1-tbl-addr | ... | V | 567ec681f3Smrg * +-------+-------------+------+---+ 577ec681f3Smrg * 587ec681f3Smrg * If the `V` bit is set, then the L1-tbl-addr gives the address for the 597ec681f3Smrg * level-1 table entries, with the lower address bits filled with zero. The L1 607ec681f3Smrg * Table Entry is located by indexing into this buffer as a uint64_t array 617ec681f3Smrg * using the L1-idx value. The 64-bit L1 entry is defined as: 627ec681f3Smrg * 637ec681f3Smrg * +--------+------+-------+-------+-------+---------------+-----+---+ 647ec681f3Smrg * | 63:58 | 57 | 56:54 | 53:52 | 51:48 | 47:8 | 7:1 | 0 | 657ec681f3Smrg * | Format | Y/Cr | Depth | TM | ... | aux-data-addr | ... | V | 667ec681f3Smrg * +--------+------+-------+-------+-------+---------------+-----+---+ 677ec681f3Smrg * 687ec681f3Smrg * Where: 697ec681f3Smrg * - Format: See `get_format_encoding` 707ec681f3Smrg * - Y/Cr: 0=Y(Luma), 1=Cr(Chroma) 717ec681f3Smrg * - (bit) Depth: See `get_bpp_encoding` 727ec681f3Smrg * - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd 737ec681f3Smrg * - aux-data-addr: VMA/GPU address for the aux-data 747ec681f3Smrg * - V: entry is valid 757ec681f3Smrg */ 767ec681f3Smrg 777ec681f3Smrg#include "intel_aux_map.h" 787ec681f3Smrg#include "intel_gem.h" 797ec681f3Smrg 807ec681f3Smrg#include "dev/intel_device_info.h" 817ec681f3Smrg#include "isl/isl.h" 827ec681f3Smrg 837ec681f3Smrg#include "drm-uapi/i915_drm.h" 847ec681f3Smrg#include "util/list.h" 857ec681f3Smrg#include "util/ralloc.h" 867ec681f3Smrg#include "util/u_atomic.h" 877ec681f3Smrg#include "main/macros.h" 887ec681f3Smrg 897ec681f3Smrg#include <inttypes.h> 907ec681f3Smrg#include <stdlib.h> 917ec681f3Smrg#include <stdio.h> 927ec681f3Smrg#include <pthread.h> 937ec681f3Smrg 947ec681f3Smrgstatic const bool aux_map_debug = false; 957ec681f3Smrg 967ec681f3Smrgstruct aux_map_buffer { 977ec681f3Smrg struct list_head link; 987ec681f3Smrg struct intel_buffer *buffer; 997ec681f3Smrg}; 1007ec681f3Smrg 1017ec681f3Smrgstruct intel_aux_map_context { 1027ec681f3Smrg void *driver_ctx; 1037ec681f3Smrg pthread_mutex_t mutex; 1047ec681f3Smrg struct intel_mapped_pinned_buffer_alloc *buffer_alloc; 1057ec681f3Smrg uint32_t num_buffers; 1067ec681f3Smrg struct list_head buffers; 1077ec681f3Smrg uint64_t level3_base_addr; 1087ec681f3Smrg uint64_t *level3_map; 1097ec681f3Smrg uint32_t tail_offset, tail_remaining; 1107ec681f3Smrg uint32_t state_num; 1117ec681f3Smrg}; 1127ec681f3Smrg 1137ec681f3Smrgstatic bool 1147ec681f3Smrgadd_buffer(struct intel_aux_map_context *ctx) 1157ec681f3Smrg{ 1167ec681f3Smrg struct aux_map_buffer *buf = ralloc(ctx, struct aux_map_buffer); 1177ec681f3Smrg if (!buf) 1187ec681f3Smrg return false; 1197ec681f3Smrg 1207ec681f3Smrg const uint32_t size = 0x100000; 1217ec681f3Smrg buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size); 1227ec681f3Smrg if (!buf->buffer) { 1237ec681f3Smrg ralloc_free(buf); 1247ec681f3Smrg return false; 1257ec681f3Smrg } 1267ec681f3Smrg 1277ec681f3Smrg assert(buf->buffer->map != NULL); 1287ec681f3Smrg 1297ec681f3Smrg list_addtail(&buf->link, &ctx->buffers); 1307ec681f3Smrg ctx->tail_offset = 0; 1317ec681f3Smrg ctx->tail_remaining = size; 1327ec681f3Smrg p_atomic_inc(&ctx->num_buffers); 1337ec681f3Smrg 1347ec681f3Smrg return true; 1357ec681f3Smrg} 1367ec681f3Smrg 1377ec681f3Smrgstatic void 1387ec681f3Smrgadvance_current_pos(struct intel_aux_map_context *ctx, uint32_t size) 1397ec681f3Smrg{ 1407ec681f3Smrg assert(ctx->tail_remaining >= size); 1417ec681f3Smrg ctx->tail_remaining -= size; 1427ec681f3Smrg ctx->tail_offset += size; 1437ec681f3Smrg} 1447ec681f3Smrg 1457ec681f3Smrgstatic bool 1467ec681f3Smrgalign_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size, 1477ec681f3Smrg uint32_t align) 1487ec681f3Smrg{ 1497ec681f3Smrg if (ctx->tail_remaining < size) 1507ec681f3Smrg return false; 1517ec681f3Smrg 1527ec681f3Smrg struct aux_map_buffer *tail = 1537ec681f3Smrg list_last_entry(&ctx->buffers, struct aux_map_buffer, link); 1547ec681f3Smrg uint64_t gpu = tail->buffer->gpu + ctx->tail_offset; 1557ec681f3Smrg uint64_t aligned = align64(gpu, align); 1567ec681f3Smrg 1577ec681f3Smrg if ((aligned - gpu) + size > ctx->tail_remaining) { 1587ec681f3Smrg return false; 1597ec681f3Smrg } else { 1607ec681f3Smrg if (aligned - gpu > 0) 1617ec681f3Smrg advance_current_pos(ctx, aligned - gpu); 1627ec681f3Smrg return true; 1637ec681f3Smrg } 1647ec681f3Smrg} 1657ec681f3Smrg 1667ec681f3Smrgstatic void 1677ec681f3Smrgget_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map) 1687ec681f3Smrg{ 1697ec681f3Smrg assert(!list_is_empty(&ctx->buffers)); 1707ec681f3Smrg struct aux_map_buffer *tail = 1717ec681f3Smrg list_last_entry(&ctx->buffers, struct aux_map_buffer, link); 1727ec681f3Smrg if (gpu) 1737ec681f3Smrg *gpu = tail->buffer->gpu + ctx->tail_offset; 1747ec681f3Smrg if (map) 1757ec681f3Smrg *map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset); 1767ec681f3Smrg} 1777ec681f3Smrg 1787ec681f3Smrgstatic bool 1797ec681f3Smrgadd_sub_table(struct intel_aux_map_context *ctx, uint32_t size, 1807ec681f3Smrg uint32_t align, uint64_t *gpu, uint64_t **map) 1817ec681f3Smrg{ 1827ec681f3Smrg if (!align_and_verify_space(ctx, size, align)) { 1837ec681f3Smrg if (!add_buffer(ctx)) 1847ec681f3Smrg return false; 1857ec681f3Smrg UNUSED bool aligned = align_and_verify_space(ctx, size, align); 1867ec681f3Smrg assert(aligned); 1877ec681f3Smrg } 1887ec681f3Smrg get_current_pos(ctx, gpu, map); 1897ec681f3Smrg memset(*map, 0, size); 1907ec681f3Smrg advance_current_pos(ctx, size); 1917ec681f3Smrg return true; 1927ec681f3Smrg} 1937ec681f3Smrg 1947ec681f3Smrguint32_t 1957ec681f3Smrgintel_aux_map_get_state_num(struct intel_aux_map_context *ctx) 1967ec681f3Smrg{ 1977ec681f3Smrg return p_atomic_read(&ctx->state_num); 1987ec681f3Smrg} 1997ec681f3Smrg 2007ec681f3Smrgstruct intel_aux_map_context * 2017ec681f3Smrgintel_aux_map_init(void *driver_ctx, 2027ec681f3Smrg struct intel_mapped_pinned_buffer_alloc *buffer_alloc, 2037ec681f3Smrg const struct intel_device_info *devinfo) 2047ec681f3Smrg{ 2057ec681f3Smrg struct intel_aux_map_context *ctx; 2067ec681f3Smrg if (devinfo->ver < 12) 2077ec681f3Smrg return NULL; 2087ec681f3Smrg 2097ec681f3Smrg ctx = ralloc(NULL, struct intel_aux_map_context); 2107ec681f3Smrg if (!ctx) 2117ec681f3Smrg return NULL; 2127ec681f3Smrg 2137ec681f3Smrg if (pthread_mutex_init(&ctx->mutex, NULL)) 2147ec681f3Smrg return NULL; 2157ec681f3Smrg 2167ec681f3Smrg ctx->driver_ctx = driver_ctx; 2177ec681f3Smrg ctx->buffer_alloc = buffer_alloc; 2187ec681f3Smrg ctx->num_buffers = 0; 2197ec681f3Smrg list_inithead(&ctx->buffers); 2207ec681f3Smrg ctx->tail_offset = 0; 2217ec681f3Smrg ctx->tail_remaining = 0; 2227ec681f3Smrg ctx->state_num = 0; 2237ec681f3Smrg 2247ec681f3Smrg if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &ctx->level3_base_addr, 2257ec681f3Smrg &ctx->level3_map)) { 2267ec681f3Smrg if (aux_map_debug) 2277ec681f3Smrg fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n", 2287ec681f3Smrg ctx->level3_base_addr, ctx->level3_map); 2297ec681f3Smrg p_atomic_inc(&ctx->state_num); 2307ec681f3Smrg return ctx; 2317ec681f3Smrg } else { 2327ec681f3Smrg ralloc_free(ctx); 2337ec681f3Smrg return NULL; 2347ec681f3Smrg } 2357ec681f3Smrg} 2367ec681f3Smrg 2377ec681f3Smrgvoid 2387ec681f3Smrgintel_aux_map_finish(struct intel_aux_map_context *ctx) 2397ec681f3Smrg{ 2407ec681f3Smrg if (!ctx) 2417ec681f3Smrg return; 2427ec681f3Smrg 2437ec681f3Smrg pthread_mutex_destroy(&ctx->mutex); 2447ec681f3Smrg list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) { 2457ec681f3Smrg ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer); 2467ec681f3Smrg list_del(&buf->link); 2477ec681f3Smrg p_atomic_dec(&ctx->num_buffers); 2487ec681f3Smrg ralloc_free(buf); 2497ec681f3Smrg } 2507ec681f3Smrg 2517ec681f3Smrg ralloc_free(ctx); 2527ec681f3Smrg} 2537ec681f3Smrg 2547ec681f3Smrguint64_t 2557ec681f3Smrgintel_aux_map_get_base(struct intel_aux_map_context *ctx) 2567ec681f3Smrg{ 2577ec681f3Smrg /** 2587ec681f3Smrg * This get initialized in intel_aux_map_init, and never changes, so there is 2597ec681f3Smrg * no need to lock the mutex. 2607ec681f3Smrg */ 2617ec681f3Smrg return ctx->level3_base_addr; 2627ec681f3Smrg} 2637ec681f3Smrg 2647ec681f3Smrgstatic struct aux_map_buffer * 2657ec681f3Smrgfind_buffer(struct intel_aux_map_context *ctx, uint64_t addr) 2667ec681f3Smrg{ 2677ec681f3Smrg list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) { 2687ec681f3Smrg if (buf->buffer->gpu <= addr && buf->buffer->gpu_end > addr) { 2697ec681f3Smrg return buf; 2707ec681f3Smrg } 2717ec681f3Smrg } 2727ec681f3Smrg return NULL; 2737ec681f3Smrg} 2747ec681f3Smrg 2757ec681f3Smrgstatic uint64_t * 2767ec681f3Smrgget_u64_entry_ptr(struct intel_aux_map_context *ctx, uint64_t addr) 2777ec681f3Smrg{ 2787ec681f3Smrg struct aux_map_buffer *buf = find_buffer(ctx, addr); 2797ec681f3Smrg assert(buf); 2807ec681f3Smrg uintptr_t map_offset = addr - buf->buffer->gpu; 2817ec681f3Smrg return (uint64_t*)((uint8_t*)buf->buffer->map + map_offset); 2827ec681f3Smrg} 2837ec681f3Smrg 2847ec681f3Smrgstatic uint8_t 2857ec681f3Smrgget_bpp_encoding(enum isl_format format) 2867ec681f3Smrg{ 2877ec681f3Smrg if (isl_format_is_yuv(format)) { 2887ec681f3Smrg switch (format) { 2897ec681f3Smrg case ISL_FORMAT_YCRCB_NORMAL: 2907ec681f3Smrg case ISL_FORMAT_YCRCB_SWAPY: 2917ec681f3Smrg case ISL_FORMAT_PLANAR_420_8: return 3; 2927ec681f3Smrg case ISL_FORMAT_PLANAR_420_12: return 2; 2937ec681f3Smrg case ISL_FORMAT_PLANAR_420_10: return 1; 2947ec681f3Smrg case ISL_FORMAT_PLANAR_420_16: return 0; 2957ec681f3Smrg default: 2967ec681f3Smrg unreachable("Unsupported format!"); 2977ec681f3Smrg return 0; 2987ec681f3Smrg } 2997ec681f3Smrg } else { 3007ec681f3Smrg switch (isl_format_get_layout(format)->bpb) { 3017ec681f3Smrg case 16: return 0; 3027ec681f3Smrg case 8: return 4; 3037ec681f3Smrg case 32: return 5; 3047ec681f3Smrg case 64: return 6; 3057ec681f3Smrg case 128: return 7; 3067ec681f3Smrg default: 3077ec681f3Smrg unreachable("Unsupported bpp!"); 3087ec681f3Smrg return 0; 3097ec681f3Smrg } 3107ec681f3Smrg } 3117ec681f3Smrg} 3127ec681f3Smrg 3137ec681f3Smrg#define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT (0x1ull << 52) 3147ec681f3Smrg 3157ec681f3Smrguint64_t 3167ec681f3Smrgintel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format, 3177ec681f3Smrg uint8_t plane) 3187ec681f3Smrg{ 3197ec681f3Smrg if (aux_map_debug) 3207ec681f3Smrg fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n", 3217ec681f3Smrg isl_format_get_name(format), 3227ec681f3Smrg isl_format_get_aux_map_encoding(format)); 3237ec681f3Smrg 3247ec681f3Smrg assert(isl_tiling_is_any_y(tiling)); 3257ec681f3Smrg 3267ec681f3Smrg uint64_t format_bits = 3277ec681f3Smrg ((uint64_t)isl_format_get_aux_map_encoding(format) << 58) | 3287ec681f3Smrg ((uint64_t)(plane > 0) << 57) | 3297ec681f3Smrg ((uint64_t)get_bpp_encoding(format) << 54) | 3307ec681f3Smrg INTEL_AUX_MAP_ENTRY_Y_TILED_BIT; 3317ec681f3Smrg 3327ec681f3Smrg assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits); 3337ec681f3Smrg 3347ec681f3Smrg return format_bits; 3357ec681f3Smrg} 3367ec681f3Smrg 3377ec681f3Smrguint64_t 3387ec681f3Smrgintel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf) 3397ec681f3Smrg{ 3407ec681f3Smrg assert(!isl_format_is_planar(isl_surf->format)); 3417ec681f3Smrg return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0); 3427ec681f3Smrg} 3437ec681f3Smrg 3447ec681f3Smrgstatic void 3457ec681f3Smrgget_aux_entry(struct intel_aux_map_context *ctx, uint64_t address, 3467ec681f3Smrg uint32_t *l1_index_out, uint64_t *l1_entry_addr_out, 3477ec681f3Smrg uint64_t **l1_entry_map_out) 3487ec681f3Smrg{ 3497ec681f3Smrg uint32_t l3_index = (address >> 36) & 0xfff; 3507ec681f3Smrg uint64_t *l3_entry = &ctx->level3_map[l3_index]; 3517ec681f3Smrg 3527ec681f3Smrg uint64_t *l2_map; 3537ec681f3Smrg if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 3547ec681f3Smrg uint64_t l2_gpu; 3557ec681f3Smrg if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &l2_gpu, &l2_map)) { 3567ec681f3Smrg if (aux_map_debug) 3577ec681f3Smrg fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n", 3587ec681f3Smrg l3_index, l2_gpu, l2_map); 3597ec681f3Smrg } else { 3607ec681f3Smrg unreachable("Failed to add L2 Aux-Map Page Table!"); 3617ec681f3Smrg } 3627ec681f3Smrg *l3_entry = (l2_gpu & 0xffffffff8000ULL) | 1; 3637ec681f3Smrg } else { 3647ec681f3Smrg uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL); 3657ec681f3Smrg l2_map = get_u64_entry_ptr(ctx, l2_addr); 3667ec681f3Smrg } 3677ec681f3Smrg uint32_t l2_index = (address >> 24) & 0xfff; 3687ec681f3Smrg uint64_t *l2_entry = &l2_map[l2_index]; 3697ec681f3Smrg 3707ec681f3Smrg uint64_t l1_addr, *l1_map; 3717ec681f3Smrg if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 3727ec681f3Smrg if (add_sub_table(ctx, 8 * 1024, 8 * 1024, &l1_addr, &l1_map)) { 3737ec681f3Smrg if (aux_map_debug) 3747ec681f3Smrg fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n", 3757ec681f3Smrg l2_index, l1_addr, l1_map); 3767ec681f3Smrg } else { 3777ec681f3Smrg unreachable("Failed to add L1 Aux-Map Page Table!"); 3787ec681f3Smrg } 3797ec681f3Smrg *l2_entry = (l1_addr & 0xffffffffe000ULL) | 1; 3807ec681f3Smrg } else { 3817ec681f3Smrg l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL); 3827ec681f3Smrg l1_map = get_u64_entry_ptr(ctx, l1_addr); 3837ec681f3Smrg } 3847ec681f3Smrg uint32_t l1_index = (address >> 16) & 0xff; 3857ec681f3Smrg if (l1_index_out) 3867ec681f3Smrg *l1_index_out = l1_index; 3877ec681f3Smrg if (l1_entry_addr_out) 3887ec681f3Smrg *l1_entry_addr_out = l1_addr + l1_index * sizeof(*l1_map); 3897ec681f3Smrg if (l1_entry_map_out) 3907ec681f3Smrg *l1_entry_map_out = &l1_map[l1_index]; 3917ec681f3Smrg} 3927ec681f3Smrg 3937ec681f3Smrgstatic void 3947ec681f3Smrgadd_mapping(struct intel_aux_map_context *ctx, uint64_t address, 3957ec681f3Smrg uint64_t aux_address, uint64_t format_bits, 3967ec681f3Smrg bool *state_changed) 3977ec681f3Smrg{ 3987ec681f3Smrg if (aux_map_debug) 3997ec681f3Smrg fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", address, 4007ec681f3Smrg aux_address); 4017ec681f3Smrg 4027ec681f3Smrg uint32_t l1_index; 4037ec681f3Smrg uint64_t *l1_entry; 4047ec681f3Smrg get_aux_entry(ctx, address, &l1_index, NULL, &l1_entry); 4057ec681f3Smrg 4067ec681f3Smrg const uint64_t l1_data = 4077ec681f3Smrg (aux_address & INTEL_AUX_MAP_ADDRESS_MASK) | 4087ec681f3Smrg format_bits | 4097ec681f3Smrg INTEL_AUX_MAP_ENTRY_VALID_BIT; 4107ec681f3Smrg 4117ec681f3Smrg const uint64_t current_l1_data = *l1_entry; 4127ec681f3Smrg if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 4137ec681f3Smrg assert((aux_address & 0xffULL) == 0); 4147ec681f3Smrg if (aux_map_debug) 4157ec681f3Smrg fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n", 4167ec681f3Smrg l1_index, current_l1_data, l1_data); 4177ec681f3Smrg /** 4187ec681f3Smrg * We use non-zero bits in 63:1 to indicate the entry had been filled 4197ec681f3Smrg * previously. If these bits are non-zero and they don't exactly match 4207ec681f3Smrg * what we want to program into the entry, then we must force the 4217ec681f3Smrg * aux-map tables to be flushed. 4227ec681f3Smrg */ 4237ec681f3Smrg if (current_l1_data != 0 && \ 4247ec681f3Smrg (current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data) 4257ec681f3Smrg *state_changed = true; 4267ec681f3Smrg *l1_entry = l1_data; 4277ec681f3Smrg } else { 4287ec681f3Smrg if (aux_map_debug) 4297ec681f3Smrg fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n", 4307ec681f3Smrg l1_index); 4317ec681f3Smrg assert(*l1_entry == l1_data); 4327ec681f3Smrg } 4337ec681f3Smrg} 4347ec681f3Smrg 4357ec681f3Smrguint64_t * 4367ec681f3Smrgintel_aux_map_get_entry(struct intel_aux_map_context *ctx, 4377ec681f3Smrg uint64_t address, 4387ec681f3Smrg uint64_t *entry_address) 4397ec681f3Smrg{ 4407ec681f3Smrg pthread_mutex_lock(&ctx->mutex); 4417ec681f3Smrg uint64_t *l1_entry_map; 4427ec681f3Smrg get_aux_entry(ctx, address, NULL, entry_address, &l1_entry_map); 4437ec681f3Smrg pthread_mutex_unlock(&ctx->mutex); 4447ec681f3Smrg 4457ec681f3Smrg return l1_entry_map; 4467ec681f3Smrg} 4477ec681f3Smrg 4487ec681f3Smrgvoid 4497ec681f3Smrgintel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t address, 4507ec681f3Smrg uint64_t aux_address, uint64_t main_size_B, 4517ec681f3Smrg uint64_t format_bits) 4527ec681f3Smrg{ 4537ec681f3Smrg bool state_changed = false; 4547ec681f3Smrg pthread_mutex_lock(&ctx->mutex); 4557ec681f3Smrg uint64_t map_addr = address; 4567ec681f3Smrg uint64_t dest_aux_addr = aux_address; 4577ec681f3Smrg assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address); 4587ec681f3Smrg assert(align64(aux_address, INTEL_AUX_MAP_AUX_PAGE_SIZE) == aux_address); 4597ec681f3Smrg while (map_addr - address < main_size_B) { 4607ec681f3Smrg add_mapping(ctx, map_addr, dest_aux_addr, format_bits, &state_changed); 4617ec681f3Smrg map_addr += INTEL_AUX_MAP_MAIN_PAGE_SIZE; 4627ec681f3Smrg dest_aux_addr += INTEL_AUX_MAP_AUX_PAGE_SIZE; 4637ec681f3Smrg } 4647ec681f3Smrg pthread_mutex_unlock(&ctx->mutex); 4657ec681f3Smrg if (state_changed) 4667ec681f3Smrg p_atomic_inc(&ctx->state_num); 4677ec681f3Smrg} 4687ec681f3Smrg 4697ec681f3Smrg/** 4707ec681f3Smrg * We mark the leaf entry as invalid, but we don't attempt to cleanup the 4717ec681f3Smrg * other levels of translation mappings. Since we attempt to re-use VMA 4727ec681f3Smrg * ranges, hopefully this will not lead to unbounded growth of the translation 4737ec681f3Smrg * tables. 4747ec681f3Smrg */ 4757ec681f3Smrgstatic void 4767ec681f3Smrgremove_mapping(struct intel_aux_map_context *ctx, uint64_t address, 4777ec681f3Smrg bool *state_changed) 4787ec681f3Smrg{ 4797ec681f3Smrg uint32_t l3_index = (address >> 36) & 0xfff; 4807ec681f3Smrg uint64_t *l3_entry = &ctx->level3_map[l3_index]; 4817ec681f3Smrg 4827ec681f3Smrg uint64_t *l2_map; 4837ec681f3Smrg if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 4847ec681f3Smrg return; 4857ec681f3Smrg } else { 4867ec681f3Smrg uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL); 4877ec681f3Smrg l2_map = get_u64_entry_ptr(ctx, l2_addr); 4887ec681f3Smrg } 4897ec681f3Smrg uint32_t l2_index = (address >> 24) & 0xfff; 4907ec681f3Smrg uint64_t *l2_entry = &l2_map[l2_index]; 4917ec681f3Smrg 4927ec681f3Smrg uint64_t *l1_map; 4937ec681f3Smrg if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 4947ec681f3Smrg return; 4957ec681f3Smrg } else { 4967ec681f3Smrg uint64_t l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL); 4977ec681f3Smrg l1_map = get_u64_entry_ptr(ctx, l1_addr); 4987ec681f3Smrg } 4997ec681f3Smrg uint32_t l1_index = (address >> 16) & 0xff; 5007ec681f3Smrg uint64_t *l1_entry = &l1_map[l1_index]; 5017ec681f3Smrg 5027ec681f3Smrg const uint64_t current_l1_data = *l1_entry; 5037ec681f3Smrg const uint64_t l1_data = current_l1_data & ~1ull; 5047ec681f3Smrg 5057ec681f3Smrg if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) { 5067ec681f3Smrg return; 5077ec681f3Smrg } else { 5087ec681f3Smrg if (aux_map_debug) 5097ec681f3Smrg fprintf(stderr, "AUX-MAP [0x%x][0x%x][0x%x] L1 entry removed!\n", 5107ec681f3Smrg l3_index, l2_index, l1_index); 5117ec681f3Smrg /** 5127ec681f3Smrg * We use non-zero bits in 63:1 to indicate the entry had been filled 5137ec681f3Smrg * previously. In the unlikely event that these are all zero, we force a 5147ec681f3Smrg * flush of the aux-map tables. 5157ec681f3Smrg */ 5167ec681f3Smrg if (unlikely(l1_data == 0)) 5177ec681f3Smrg *state_changed = true; 5187ec681f3Smrg *l1_entry = l1_data; 5197ec681f3Smrg } 5207ec681f3Smrg} 5217ec681f3Smrg 5227ec681f3Smrgvoid 5237ec681f3Smrgintel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t address, 5247ec681f3Smrg uint64_t size) 5257ec681f3Smrg{ 5267ec681f3Smrg bool state_changed = false; 5277ec681f3Smrg pthread_mutex_lock(&ctx->mutex); 5287ec681f3Smrg if (aux_map_debug) 5297ec681f3Smrg fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", address, 5307ec681f3Smrg address + size); 5317ec681f3Smrg 5327ec681f3Smrg uint64_t map_addr = address; 5337ec681f3Smrg assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address); 5347ec681f3Smrg while (map_addr - address < size) { 5357ec681f3Smrg remove_mapping(ctx, map_addr, &state_changed); 5367ec681f3Smrg map_addr += 64 * 1024; 5377ec681f3Smrg } 5387ec681f3Smrg pthread_mutex_unlock(&ctx->mutex); 5397ec681f3Smrg if (state_changed) 5407ec681f3Smrg p_atomic_inc(&ctx->state_num); 5417ec681f3Smrg} 5427ec681f3Smrg 5437ec681f3Smrguint32_t 5447ec681f3Smrgintel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx) 5457ec681f3Smrg{ 5467ec681f3Smrg return p_atomic_read(&ctx->num_buffers); 5477ec681f3Smrg} 5487ec681f3Smrg 5497ec681f3Smrgvoid 5507ec681f3Smrgintel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos, 5517ec681f3Smrg uint32_t max_bos) 5527ec681f3Smrg{ 5537ec681f3Smrg assert(p_atomic_read(&ctx->num_buffers) >= max_bos); 5547ec681f3Smrg uint32_t i = 0; 5557ec681f3Smrg list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) { 5567ec681f3Smrg if (i >= max_bos) 5577ec681f3Smrg return; 5587ec681f3Smrg driver_bos[i++] = buf->buffer->driver_bo; 5597ec681f3Smrg } 5607ec681f3Smrg} 561