17ec681f3Smrg/*
27ec681f3Smrg * Copyright (c) 2018 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg/**
257ec681f3Smrg * The aux map provides a multi-level lookup of the main surface address which
267ec681f3Smrg * ends up providing information about the auxiliary surface data, including
277ec681f3Smrg * the address where the auxiliary data resides.
287ec681f3Smrg *
297ec681f3Smrg * The 48-bit VMA (GPU) address of the main surface is split to do the address
307ec681f3Smrg * lookup:
317ec681f3Smrg *
327ec681f3Smrg *  48 bit address of main surface
337ec681f3Smrg * +--------+--------+--------+------+
347ec681f3Smrg * | 47:36  | 35:24  | 23:16  | 15:0 |
357ec681f3Smrg * | L3-idx | L2-idx | L1-idx | ...  |
367ec681f3Smrg * +--------+--------+--------+------+
377ec681f3Smrg *
387ec681f3Smrg * The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is
397ec681f3Smrg * located by indexing into this buffer as a uint64_t array using the L3-idx
407ec681f3Smrg * value. The 64-bit L3 entry is defined as:
417ec681f3Smrg *
427ec681f3Smrg * +-------+-------------+------+---+
437ec681f3Smrg * | 63:48 | 47:15       | 14:1 | 0 |
447ec681f3Smrg * |  ...  | L2-tbl-addr | ...  | V |
457ec681f3Smrg * +-------+-------------+------+---+
467ec681f3Smrg *
477ec681f3Smrg * If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for
487ec681f3Smrg * the level-2 table entries, with the lower address bits filled with zero.
497ec681f3Smrg * The L2 Table Entry is located by indexing into this buffer as a uint64_t
507ec681f3Smrg * array using the L2-idx value. The 64-bit L2 entry is similar to the L3
517ec681f3Smrg * entry, except with 2 additional address bits:
527ec681f3Smrg *
537ec681f3Smrg * +-------+-------------+------+---+
547ec681f3Smrg * | 63:48 | 47:13       | 12:1 | 0 |
557ec681f3Smrg * |  ...  | L1-tbl-addr | ...  | V |
567ec681f3Smrg * +-------+-------------+------+---+
577ec681f3Smrg *
587ec681f3Smrg * If the `V` bit is set, then the L1-tbl-addr gives the address for the
597ec681f3Smrg * level-1 table entries, with the lower address bits filled with zero. The L1
607ec681f3Smrg * Table Entry is located by indexing into this buffer as a uint64_t array
617ec681f3Smrg * using the L1-idx value. The 64-bit L1 entry is defined as:
627ec681f3Smrg *
637ec681f3Smrg * +--------+------+-------+-------+-------+---------------+-----+---+
647ec681f3Smrg * | 63:58  | 57   | 56:54 | 53:52 | 51:48 | 47:8          | 7:1 | 0 |
657ec681f3Smrg * | Format | Y/Cr | Depth |  TM   |  ...  | aux-data-addr | ... | V |
667ec681f3Smrg * +--------+------+-------+-------+-------+---------------+-----+---+
677ec681f3Smrg *
687ec681f3Smrg * Where:
697ec681f3Smrg *  - Format: See `get_format_encoding`
707ec681f3Smrg *  - Y/Cr: 0=Y(Luma), 1=Cr(Chroma)
717ec681f3Smrg *  - (bit) Depth: See `get_bpp_encoding`
727ec681f3Smrg *  - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd
737ec681f3Smrg *  - aux-data-addr: VMA/GPU address for the aux-data
747ec681f3Smrg *  - V: entry is valid
757ec681f3Smrg */
767ec681f3Smrg
777ec681f3Smrg#include "intel_aux_map.h"
787ec681f3Smrg#include "intel_gem.h"
797ec681f3Smrg
807ec681f3Smrg#include "dev/intel_device_info.h"
817ec681f3Smrg#include "isl/isl.h"
827ec681f3Smrg
837ec681f3Smrg#include "drm-uapi/i915_drm.h"
847ec681f3Smrg#include "util/list.h"
857ec681f3Smrg#include "util/ralloc.h"
867ec681f3Smrg#include "util/u_atomic.h"
877ec681f3Smrg#include "main/macros.h"
887ec681f3Smrg
897ec681f3Smrg#include <inttypes.h>
907ec681f3Smrg#include <stdlib.h>
917ec681f3Smrg#include <stdio.h>
927ec681f3Smrg#include <pthread.h>
937ec681f3Smrg
947ec681f3Smrgstatic const bool aux_map_debug = false;
957ec681f3Smrg
967ec681f3Smrgstruct aux_map_buffer {
977ec681f3Smrg   struct list_head link;
987ec681f3Smrg   struct intel_buffer *buffer;
997ec681f3Smrg};
1007ec681f3Smrg
1017ec681f3Smrgstruct intel_aux_map_context {
1027ec681f3Smrg   void *driver_ctx;
1037ec681f3Smrg   pthread_mutex_t mutex;
1047ec681f3Smrg   struct intel_mapped_pinned_buffer_alloc *buffer_alloc;
1057ec681f3Smrg   uint32_t num_buffers;
1067ec681f3Smrg   struct list_head buffers;
1077ec681f3Smrg   uint64_t level3_base_addr;
1087ec681f3Smrg   uint64_t *level3_map;
1097ec681f3Smrg   uint32_t tail_offset, tail_remaining;
1107ec681f3Smrg   uint32_t state_num;
1117ec681f3Smrg};
1127ec681f3Smrg
1137ec681f3Smrgstatic bool
1147ec681f3Smrgadd_buffer(struct intel_aux_map_context *ctx)
1157ec681f3Smrg{
1167ec681f3Smrg   struct aux_map_buffer *buf = ralloc(ctx, struct aux_map_buffer);
1177ec681f3Smrg   if (!buf)
1187ec681f3Smrg      return false;
1197ec681f3Smrg
1207ec681f3Smrg   const uint32_t size = 0x100000;
1217ec681f3Smrg   buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size);
1227ec681f3Smrg   if (!buf->buffer) {
1237ec681f3Smrg      ralloc_free(buf);
1247ec681f3Smrg      return false;
1257ec681f3Smrg   }
1267ec681f3Smrg
1277ec681f3Smrg   assert(buf->buffer->map != NULL);
1287ec681f3Smrg
1297ec681f3Smrg   list_addtail(&buf->link, &ctx->buffers);
1307ec681f3Smrg   ctx->tail_offset = 0;
1317ec681f3Smrg   ctx->tail_remaining = size;
1327ec681f3Smrg   p_atomic_inc(&ctx->num_buffers);
1337ec681f3Smrg
1347ec681f3Smrg   return true;
1357ec681f3Smrg}
1367ec681f3Smrg
1377ec681f3Smrgstatic void
1387ec681f3Smrgadvance_current_pos(struct intel_aux_map_context *ctx, uint32_t size)
1397ec681f3Smrg{
1407ec681f3Smrg   assert(ctx->tail_remaining >= size);
1417ec681f3Smrg   ctx->tail_remaining -= size;
1427ec681f3Smrg   ctx->tail_offset += size;
1437ec681f3Smrg}
1447ec681f3Smrg
1457ec681f3Smrgstatic bool
1467ec681f3Smrgalign_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size,
1477ec681f3Smrg                       uint32_t align)
1487ec681f3Smrg{
1497ec681f3Smrg   if (ctx->tail_remaining < size)
1507ec681f3Smrg      return false;
1517ec681f3Smrg
1527ec681f3Smrg   struct aux_map_buffer *tail =
1537ec681f3Smrg      list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
1547ec681f3Smrg   uint64_t gpu = tail->buffer->gpu + ctx->tail_offset;
1557ec681f3Smrg   uint64_t aligned = align64(gpu, align);
1567ec681f3Smrg
1577ec681f3Smrg   if ((aligned - gpu) + size > ctx->tail_remaining) {
1587ec681f3Smrg      return false;
1597ec681f3Smrg   } else {
1607ec681f3Smrg      if (aligned - gpu > 0)
1617ec681f3Smrg         advance_current_pos(ctx, aligned - gpu);
1627ec681f3Smrg      return true;
1637ec681f3Smrg   }
1647ec681f3Smrg}
1657ec681f3Smrg
1667ec681f3Smrgstatic void
1677ec681f3Smrgget_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map)
1687ec681f3Smrg{
1697ec681f3Smrg   assert(!list_is_empty(&ctx->buffers));
1707ec681f3Smrg   struct aux_map_buffer *tail =
1717ec681f3Smrg      list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
1727ec681f3Smrg   if (gpu)
1737ec681f3Smrg      *gpu = tail->buffer->gpu + ctx->tail_offset;
1747ec681f3Smrg   if (map)
1757ec681f3Smrg      *map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset);
1767ec681f3Smrg}
1777ec681f3Smrg
1787ec681f3Smrgstatic bool
1797ec681f3Smrgadd_sub_table(struct intel_aux_map_context *ctx, uint32_t size,
1807ec681f3Smrg              uint32_t align, uint64_t *gpu, uint64_t **map)
1817ec681f3Smrg{
1827ec681f3Smrg   if (!align_and_verify_space(ctx, size, align)) {
1837ec681f3Smrg      if (!add_buffer(ctx))
1847ec681f3Smrg         return false;
1857ec681f3Smrg      UNUSED bool aligned = align_and_verify_space(ctx, size, align);
1867ec681f3Smrg      assert(aligned);
1877ec681f3Smrg   }
1887ec681f3Smrg   get_current_pos(ctx, gpu, map);
1897ec681f3Smrg   memset(*map, 0, size);
1907ec681f3Smrg   advance_current_pos(ctx, size);
1917ec681f3Smrg   return true;
1927ec681f3Smrg}
1937ec681f3Smrg
1947ec681f3Smrguint32_t
1957ec681f3Smrgintel_aux_map_get_state_num(struct intel_aux_map_context *ctx)
1967ec681f3Smrg{
1977ec681f3Smrg   return p_atomic_read(&ctx->state_num);
1987ec681f3Smrg}
1997ec681f3Smrg
2007ec681f3Smrgstruct intel_aux_map_context *
2017ec681f3Smrgintel_aux_map_init(void *driver_ctx,
2027ec681f3Smrg                   struct intel_mapped_pinned_buffer_alloc *buffer_alloc,
2037ec681f3Smrg                   const struct intel_device_info *devinfo)
2047ec681f3Smrg{
2057ec681f3Smrg   struct intel_aux_map_context *ctx;
2067ec681f3Smrg   if (devinfo->ver < 12)
2077ec681f3Smrg      return NULL;
2087ec681f3Smrg
2097ec681f3Smrg   ctx = ralloc(NULL, struct intel_aux_map_context);
2107ec681f3Smrg   if (!ctx)
2117ec681f3Smrg      return NULL;
2127ec681f3Smrg
2137ec681f3Smrg   if (pthread_mutex_init(&ctx->mutex, NULL))
2147ec681f3Smrg      return NULL;
2157ec681f3Smrg
2167ec681f3Smrg   ctx->driver_ctx = driver_ctx;
2177ec681f3Smrg   ctx->buffer_alloc = buffer_alloc;
2187ec681f3Smrg   ctx->num_buffers = 0;
2197ec681f3Smrg   list_inithead(&ctx->buffers);
2207ec681f3Smrg   ctx->tail_offset = 0;
2217ec681f3Smrg   ctx->tail_remaining = 0;
2227ec681f3Smrg   ctx->state_num = 0;
2237ec681f3Smrg
2247ec681f3Smrg   if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &ctx->level3_base_addr,
2257ec681f3Smrg                     &ctx->level3_map)) {
2267ec681f3Smrg      if (aux_map_debug)
2277ec681f3Smrg         fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n",
2287ec681f3Smrg                 ctx->level3_base_addr, ctx->level3_map);
2297ec681f3Smrg      p_atomic_inc(&ctx->state_num);
2307ec681f3Smrg      return ctx;
2317ec681f3Smrg   } else {
2327ec681f3Smrg      ralloc_free(ctx);
2337ec681f3Smrg      return NULL;
2347ec681f3Smrg   }
2357ec681f3Smrg}
2367ec681f3Smrg
2377ec681f3Smrgvoid
2387ec681f3Smrgintel_aux_map_finish(struct intel_aux_map_context *ctx)
2397ec681f3Smrg{
2407ec681f3Smrg   if (!ctx)
2417ec681f3Smrg      return;
2427ec681f3Smrg
2437ec681f3Smrg   pthread_mutex_destroy(&ctx->mutex);
2447ec681f3Smrg   list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) {
2457ec681f3Smrg      ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer);
2467ec681f3Smrg      list_del(&buf->link);
2477ec681f3Smrg      p_atomic_dec(&ctx->num_buffers);
2487ec681f3Smrg      ralloc_free(buf);
2497ec681f3Smrg   }
2507ec681f3Smrg
2517ec681f3Smrg   ralloc_free(ctx);
2527ec681f3Smrg}
2537ec681f3Smrg
2547ec681f3Smrguint64_t
2557ec681f3Smrgintel_aux_map_get_base(struct intel_aux_map_context *ctx)
2567ec681f3Smrg{
2577ec681f3Smrg   /**
2587ec681f3Smrg    * This get initialized in intel_aux_map_init, and never changes, so there is
2597ec681f3Smrg    * no need to lock the mutex.
2607ec681f3Smrg    */
2617ec681f3Smrg   return ctx->level3_base_addr;
2627ec681f3Smrg}
2637ec681f3Smrg
2647ec681f3Smrgstatic struct aux_map_buffer *
2657ec681f3Smrgfind_buffer(struct intel_aux_map_context *ctx, uint64_t addr)
2667ec681f3Smrg{
2677ec681f3Smrg   list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {
2687ec681f3Smrg      if (buf->buffer->gpu <= addr && buf->buffer->gpu_end > addr) {
2697ec681f3Smrg         return buf;
2707ec681f3Smrg      }
2717ec681f3Smrg   }
2727ec681f3Smrg   return NULL;
2737ec681f3Smrg}
2747ec681f3Smrg
2757ec681f3Smrgstatic uint64_t *
2767ec681f3Smrgget_u64_entry_ptr(struct intel_aux_map_context *ctx, uint64_t addr)
2777ec681f3Smrg{
2787ec681f3Smrg   struct aux_map_buffer *buf = find_buffer(ctx, addr);
2797ec681f3Smrg   assert(buf);
2807ec681f3Smrg   uintptr_t map_offset = addr - buf->buffer->gpu;
2817ec681f3Smrg   return (uint64_t*)((uint8_t*)buf->buffer->map + map_offset);
2827ec681f3Smrg}
2837ec681f3Smrg
2847ec681f3Smrgstatic uint8_t
2857ec681f3Smrgget_bpp_encoding(enum isl_format format)
2867ec681f3Smrg{
2877ec681f3Smrg   if (isl_format_is_yuv(format)) {
2887ec681f3Smrg      switch (format) {
2897ec681f3Smrg      case ISL_FORMAT_YCRCB_NORMAL:
2907ec681f3Smrg      case ISL_FORMAT_YCRCB_SWAPY:
2917ec681f3Smrg      case ISL_FORMAT_PLANAR_420_8: return 3;
2927ec681f3Smrg      case ISL_FORMAT_PLANAR_420_12: return 2;
2937ec681f3Smrg      case ISL_FORMAT_PLANAR_420_10: return 1;
2947ec681f3Smrg      case ISL_FORMAT_PLANAR_420_16: return 0;
2957ec681f3Smrg      default:
2967ec681f3Smrg         unreachable("Unsupported format!");
2977ec681f3Smrg         return 0;
2987ec681f3Smrg      }
2997ec681f3Smrg   } else {
3007ec681f3Smrg      switch (isl_format_get_layout(format)->bpb) {
3017ec681f3Smrg      case 16:  return 0;
3027ec681f3Smrg      case 8:   return 4;
3037ec681f3Smrg      case 32:  return 5;
3047ec681f3Smrg      case 64:  return 6;
3057ec681f3Smrg      case 128: return 7;
3067ec681f3Smrg      default:
3077ec681f3Smrg         unreachable("Unsupported bpp!");
3087ec681f3Smrg         return 0;
3097ec681f3Smrg      }
3107ec681f3Smrg   }
3117ec681f3Smrg}
3127ec681f3Smrg
3137ec681f3Smrg#define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT  (0x1ull << 52)
3147ec681f3Smrg
3157ec681f3Smrguint64_t
3167ec681f3Smrgintel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format,
3177ec681f3Smrg                          uint8_t plane)
3187ec681f3Smrg{
3197ec681f3Smrg   if (aux_map_debug)
3207ec681f3Smrg      fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n",
3217ec681f3Smrg              isl_format_get_name(format),
3227ec681f3Smrg              isl_format_get_aux_map_encoding(format));
3237ec681f3Smrg
3247ec681f3Smrg   assert(isl_tiling_is_any_y(tiling));
3257ec681f3Smrg
3267ec681f3Smrg   uint64_t format_bits =
3277ec681f3Smrg      ((uint64_t)isl_format_get_aux_map_encoding(format) << 58) |
3287ec681f3Smrg      ((uint64_t)(plane > 0) << 57) |
3297ec681f3Smrg      ((uint64_t)get_bpp_encoding(format) << 54) |
3307ec681f3Smrg      INTEL_AUX_MAP_ENTRY_Y_TILED_BIT;
3317ec681f3Smrg
3327ec681f3Smrg   assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits);
3337ec681f3Smrg
3347ec681f3Smrg   return format_bits;
3357ec681f3Smrg}
3367ec681f3Smrg
3377ec681f3Smrguint64_t
3387ec681f3Smrgintel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf)
3397ec681f3Smrg{
3407ec681f3Smrg   assert(!isl_format_is_planar(isl_surf->format));
3417ec681f3Smrg   return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0);
3427ec681f3Smrg}
3437ec681f3Smrg
3447ec681f3Smrgstatic void
3457ec681f3Smrgget_aux_entry(struct intel_aux_map_context *ctx, uint64_t address,
3467ec681f3Smrg              uint32_t *l1_index_out, uint64_t *l1_entry_addr_out,
3477ec681f3Smrg              uint64_t **l1_entry_map_out)
3487ec681f3Smrg{
3497ec681f3Smrg   uint32_t l3_index = (address >> 36) & 0xfff;
3507ec681f3Smrg   uint64_t *l3_entry = &ctx->level3_map[l3_index];
3517ec681f3Smrg
3527ec681f3Smrg   uint64_t *l2_map;
3537ec681f3Smrg   if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
3547ec681f3Smrg      uint64_t l2_gpu;
3557ec681f3Smrg      if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &l2_gpu, &l2_map)) {
3567ec681f3Smrg         if (aux_map_debug)
3577ec681f3Smrg            fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n",
3587ec681f3Smrg                    l3_index, l2_gpu, l2_map);
3597ec681f3Smrg      } else {
3607ec681f3Smrg         unreachable("Failed to add L2 Aux-Map Page Table!");
3617ec681f3Smrg      }
3627ec681f3Smrg      *l3_entry = (l2_gpu & 0xffffffff8000ULL) | 1;
3637ec681f3Smrg   } else {
3647ec681f3Smrg      uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL);
3657ec681f3Smrg      l2_map = get_u64_entry_ptr(ctx, l2_addr);
3667ec681f3Smrg   }
3677ec681f3Smrg   uint32_t l2_index = (address >> 24) & 0xfff;
3687ec681f3Smrg   uint64_t *l2_entry = &l2_map[l2_index];
3697ec681f3Smrg
3707ec681f3Smrg   uint64_t l1_addr, *l1_map;
3717ec681f3Smrg   if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
3727ec681f3Smrg      if (add_sub_table(ctx, 8 * 1024, 8 * 1024, &l1_addr, &l1_map)) {
3737ec681f3Smrg         if (aux_map_debug)
3747ec681f3Smrg            fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n",
3757ec681f3Smrg                    l2_index, l1_addr, l1_map);
3767ec681f3Smrg      } else {
3777ec681f3Smrg         unreachable("Failed to add L1 Aux-Map Page Table!");
3787ec681f3Smrg      }
3797ec681f3Smrg      *l2_entry = (l1_addr & 0xffffffffe000ULL) | 1;
3807ec681f3Smrg   } else {
3817ec681f3Smrg      l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL);
3827ec681f3Smrg      l1_map = get_u64_entry_ptr(ctx, l1_addr);
3837ec681f3Smrg   }
3847ec681f3Smrg   uint32_t l1_index = (address >> 16) & 0xff;
3857ec681f3Smrg   if (l1_index_out)
3867ec681f3Smrg      *l1_index_out = l1_index;
3877ec681f3Smrg   if (l1_entry_addr_out)
3887ec681f3Smrg      *l1_entry_addr_out = l1_addr + l1_index * sizeof(*l1_map);
3897ec681f3Smrg   if (l1_entry_map_out)
3907ec681f3Smrg      *l1_entry_map_out = &l1_map[l1_index];
3917ec681f3Smrg}
3927ec681f3Smrg
3937ec681f3Smrgstatic void
3947ec681f3Smrgadd_mapping(struct intel_aux_map_context *ctx, uint64_t address,
3957ec681f3Smrg            uint64_t aux_address, uint64_t format_bits,
3967ec681f3Smrg            bool *state_changed)
3977ec681f3Smrg{
3987ec681f3Smrg   if (aux_map_debug)
3997ec681f3Smrg      fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", address,
4007ec681f3Smrg              aux_address);
4017ec681f3Smrg
4027ec681f3Smrg   uint32_t l1_index;
4037ec681f3Smrg   uint64_t *l1_entry;
4047ec681f3Smrg   get_aux_entry(ctx, address, &l1_index, NULL, &l1_entry);
4057ec681f3Smrg
4067ec681f3Smrg   const uint64_t l1_data =
4077ec681f3Smrg      (aux_address & INTEL_AUX_MAP_ADDRESS_MASK) |
4087ec681f3Smrg      format_bits |
4097ec681f3Smrg      INTEL_AUX_MAP_ENTRY_VALID_BIT;
4107ec681f3Smrg
4117ec681f3Smrg   const uint64_t current_l1_data = *l1_entry;
4127ec681f3Smrg   if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
4137ec681f3Smrg      assert((aux_address & 0xffULL) == 0);
4147ec681f3Smrg      if (aux_map_debug)
4157ec681f3Smrg         fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n",
4167ec681f3Smrg                 l1_index, current_l1_data, l1_data);
4177ec681f3Smrg      /**
4187ec681f3Smrg       * We use non-zero bits in 63:1 to indicate the entry had been filled
4197ec681f3Smrg       * previously. If these bits are non-zero and they don't exactly match
4207ec681f3Smrg       * what we want to program into the entry, then we must force the
4217ec681f3Smrg       * aux-map tables to be flushed.
4227ec681f3Smrg       */
4237ec681f3Smrg      if (current_l1_data != 0 && \
4247ec681f3Smrg          (current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data)
4257ec681f3Smrg         *state_changed = true;
4267ec681f3Smrg      *l1_entry = l1_data;
4277ec681f3Smrg   } else {
4287ec681f3Smrg      if (aux_map_debug)
4297ec681f3Smrg         fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n",
4307ec681f3Smrg                 l1_index);
4317ec681f3Smrg      assert(*l1_entry == l1_data);
4327ec681f3Smrg   }
4337ec681f3Smrg}
4347ec681f3Smrg
4357ec681f3Smrguint64_t *
4367ec681f3Smrgintel_aux_map_get_entry(struct intel_aux_map_context *ctx,
4377ec681f3Smrg                        uint64_t address,
4387ec681f3Smrg                        uint64_t *entry_address)
4397ec681f3Smrg{
4407ec681f3Smrg   pthread_mutex_lock(&ctx->mutex);
4417ec681f3Smrg   uint64_t *l1_entry_map;
4427ec681f3Smrg   get_aux_entry(ctx, address, NULL, entry_address, &l1_entry_map);
4437ec681f3Smrg   pthread_mutex_unlock(&ctx->mutex);
4447ec681f3Smrg
4457ec681f3Smrg   return l1_entry_map;
4467ec681f3Smrg}
4477ec681f3Smrg
4487ec681f3Smrgvoid
4497ec681f3Smrgintel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t address,
4507ec681f3Smrg                          uint64_t aux_address, uint64_t main_size_B,
4517ec681f3Smrg                          uint64_t format_bits)
4527ec681f3Smrg{
4537ec681f3Smrg   bool state_changed = false;
4547ec681f3Smrg   pthread_mutex_lock(&ctx->mutex);
4557ec681f3Smrg   uint64_t map_addr = address;
4567ec681f3Smrg   uint64_t dest_aux_addr = aux_address;
4577ec681f3Smrg   assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address);
4587ec681f3Smrg   assert(align64(aux_address, INTEL_AUX_MAP_AUX_PAGE_SIZE) == aux_address);
4597ec681f3Smrg   while (map_addr - address < main_size_B) {
4607ec681f3Smrg      add_mapping(ctx, map_addr, dest_aux_addr, format_bits, &state_changed);
4617ec681f3Smrg      map_addr += INTEL_AUX_MAP_MAIN_PAGE_SIZE;
4627ec681f3Smrg      dest_aux_addr += INTEL_AUX_MAP_AUX_PAGE_SIZE;
4637ec681f3Smrg   }
4647ec681f3Smrg   pthread_mutex_unlock(&ctx->mutex);
4657ec681f3Smrg   if (state_changed)
4667ec681f3Smrg      p_atomic_inc(&ctx->state_num);
4677ec681f3Smrg}
4687ec681f3Smrg
4697ec681f3Smrg/**
4707ec681f3Smrg * We mark the leaf entry as invalid, but we don't attempt to cleanup the
4717ec681f3Smrg * other levels of translation mappings. Since we attempt to re-use VMA
4727ec681f3Smrg * ranges, hopefully this will not lead to unbounded growth of the translation
4737ec681f3Smrg * tables.
4747ec681f3Smrg */
4757ec681f3Smrgstatic void
4767ec681f3Smrgremove_mapping(struct intel_aux_map_context *ctx, uint64_t address,
4777ec681f3Smrg               bool *state_changed)
4787ec681f3Smrg{
4797ec681f3Smrg   uint32_t l3_index = (address >> 36) & 0xfff;
4807ec681f3Smrg   uint64_t *l3_entry = &ctx->level3_map[l3_index];
4817ec681f3Smrg
4827ec681f3Smrg   uint64_t *l2_map;
4837ec681f3Smrg   if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
4847ec681f3Smrg      return;
4857ec681f3Smrg   } else {
4867ec681f3Smrg      uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL);
4877ec681f3Smrg      l2_map = get_u64_entry_ptr(ctx, l2_addr);
4887ec681f3Smrg   }
4897ec681f3Smrg   uint32_t l2_index = (address >> 24) & 0xfff;
4907ec681f3Smrg   uint64_t *l2_entry = &l2_map[l2_index];
4917ec681f3Smrg
4927ec681f3Smrg   uint64_t *l1_map;
4937ec681f3Smrg   if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
4947ec681f3Smrg      return;
4957ec681f3Smrg   } else {
4967ec681f3Smrg      uint64_t l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL);
4977ec681f3Smrg      l1_map = get_u64_entry_ptr(ctx, l1_addr);
4987ec681f3Smrg   }
4997ec681f3Smrg   uint32_t l1_index = (address >> 16) & 0xff;
5007ec681f3Smrg   uint64_t *l1_entry = &l1_map[l1_index];
5017ec681f3Smrg
5027ec681f3Smrg   const uint64_t current_l1_data = *l1_entry;
5037ec681f3Smrg   const uint64_t l1_data = current_l1_data & ~1ull;
5047ec681f3Smrg
5057ec681f3Smrg   if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
5067ec681f3Smrg      return;
5077ec681f3Smrg   } else {
5087ec681f3Smrg      if (aux_map_debug)
5097ec681f3Smrg         fprintf(stderr, "AUX-MAP [0x%x][0x%x][0x%x] L1 entry removed!\n",
5107ec681f3Smrg                 l3_index, l2_index, l1_index);
5117ec681f3Smrg      /**
5127ec681f3Smrg       * We use non-zero bits in 63:1 to indicate the entry had been filled
5137ec681f3Smrg       * previously. In the unlikely event that these are all zero, we force a
5147ec681f3Smrg       * flush of the aux-map tables.
5157ec681f3Smrg       */
5167ec681f3Smrg      if (unlikely(l1_data == 0))
5177ec681f3Smrg         *state_changed = true;
5187ec681f3Smrg      *l1_entry = l1_data;
5197ec681f3Smrg   }
5207ec681f3Smrg}
5217ec681f3Smrg
5227ec681f3Smrgvoid
5237ec681f3Smrgintel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t address,
5247ec681f3Smrg                          uint64_t size)
5257ec681f3Smrg{
5267ec681f3Smrg   bool state_changed = false;
5277ec681f3Smrg   pthread_mutex_lock(&ctx->mutex);
5287ec681f3Smrg   if (aux_map_debug)
5297ec681f3Smrg      fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", address,
5307ec681f3Smrg              address + size);
5317ec681f3Smrg
5327ec681f3Smrg   uint64_t map_addr = address;
5337ec681f3Smrg   assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address);
5347ec681f3Smrg   while (map_addr - address < size) {
5357ec681f3Smrg      remove_mapping(ctx, map_addr, &state_changed);
5367ec681f3Smrg      map_addr += 64 * 1024;
5377ec681f3Smrg   }
5387ec681f3Smrg   pthread_mutex_unlock(&ctx->mutex);
5397ec681f3Smrg   if (state_changed)
5407ec681f3Smrg      p_atomic_inc(&ctx->state_num);
5417ec681f3Smrg}
5427ec681f3Smrg
5437ec681f3Smrguint32_t
5447ec681f3Smrgintel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx)
5457ec681f3Smrg{
5467ec681f3Smrg   return p_atomic_read(&ctx->num_buffers);
5477ec681f3Smrg}
5487ec681f3Smrg
5497ec681f3Smrgvoid
5507ec681f3Smrgintel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos,
5517ec681f3Smrg                       uint32_t max_bos)
5527ec681f3Smrg{
5537ec681f3Smrg   assert(p_atomic_read(&ctx->num_buffers) >= max_bos);
5547ec681f3Smrg   uint32_t i = 0;
5557ec681f3Smrg   list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {
5567ec681f3Smrg      if (i >= max_bos)
5577ec681f3Smrg         return;
5587ec681f3Smrg      driver_bos[i++] = buf->buffer->driver_bo;
5597ec681f3Smrg   }
5607ec681f3Smrg}
561