17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2019 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#ifndef MI_BUILDER_H
257ec681f3Smrg#define MI_BUILDER_H
267ec681f3Smrg
277ec681f3Smrg#include "dev/intel_device_info.h"
287ec681f3Smrg#include "genxml/genX_bits.h"
297ec681f3Smrg#include "util/bitscan.h"
307ec681f3Smrg#include "util/fast_idiv_by_const.h"
317ec681f3Smrg#include "util/u_math.h"
327ec681f3Smrg
337ec681f3Smrg#ifndef MI_BUILDER_NUM_ALLOC_GPRS
347ec681f3Smrg/** The number of GPRs the MI builder is allowed to allocate
357ec681f3Smrg *
367ec681f3Smrg * This may be set by a user of this API so that it can reserve some GPRs at
377ec681f3Smrg * the top end for its own use.
387ec681f3Smrg */
397ec681f3Smrg#define MI_BUILDER_NUM_ALLOC_GPRS 16
407ec681f3Smrg#endif
417ec681f3Smrg
427ec681f3Smrg/** These must be defined by the user of the builder
437ec681f3Smrg *
447ec681f3Smrg * void *__gen_get_batch_dwords(__gen_user_data *user_data,
457ec681f3Smrg *                              unsigned num_dwords);
467ec681f3Smrg *
477ec681f3Smrg * __gen_address_type
487ec681f3Smrg * __gen_address_offset(__gen_address_type addr, uint64_t offset);
497ec681f3Smrg *
507ec681f3Smrg *
517ec681f3Smrg * If self-modifying batches are supported, we must be able to pass batch
527ec681f3Smrg * addresses around as void*s so pinning as well as batch chaining or some
537ec681f3Smrg * other mechanism for ensuring batch pointers remain valid during building is
547ec681f3Smrg * required. The following function must also be defined, it returns an
557ec681f3Smrg * address in canonical form:
567ec681f3Smrg *
577ec681f3Smrg * __gen_address_type
587ec681f3Smrg * __gen_get_batch_address(__gen_user_data *user_data, void *location);
597ec681f3Smrg *
607ec681f3Smrg * Also, __gen_combine_address must accept a location value of NULL and return
617ec681f3Smrg * a fully valid 64-bit address.
627ec681f3Smrg */
637ec681f3Smrg
647ec681f3Smrg/*
657ec681f3Smrg * Start of the actual MI builder
667ec681f3Smrg */
677ec681f3Smrg
687ec681f3Smrg#define __genxml_cmd_length(cmd) cmd ## _length
697ec681f3Smrg#define __genxml_cmd_header(cmd) cmd ## _header
707ec681f3Smrg#define __genxml_cmd_pack(cmd) cmd ## _pack
717ec681f3Smrg
727ec681f3Smrg#define mi_builder_pack(b, cmd, dst, name)                          \
737ec681f3Smrg   for (struct cmd name = { __genxml_cmd_header(cmd) },                 \
747ec681f3Smrg        *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
757ec681f3Smrg        __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name),    \
767ec681f3Smrg        _dst = NULL)
777ec681f3Smrg
787ec681f3Smrg#define mi_builder_emit(b, cmd, name)                               \
797ec681f3Smrg   mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
807ec681f3Smrg
817ec681f3Smrg
827ec681f3Smrgenum mi_value_type {
837ec681f3Smrg   MI_VALUE_TYPE_IMM,
847ec681f3Smrg   MI_VALUE_TYPE_MEM32,
857ec681f3Smrg   MI_VALUE_TYPE_MEM64,
867ec681f3Smrg   MI_VALUE_TYPE_REG32,
877ec681f3Smrg   MI_VALUE_TYPE_REG64,
887ec681f3Smrg};
897ec681f3Smrg
907ec681f3Smrgstruct mi_value {
917ec681f3Smrg   enum mi_value_type type;
927ec681f3Smrg
937ec681f3Smrg   union {
947ec681f3Smrg      uint64_t imm;
957ec681f3Smrg      __gen_address_type addr;
967ec681f3Smrg      uint32_t reg;
977ec681f3Smrg   };
987ec681f3Smrg
997ec681f3Smrg#if GFX_VERx10 >= 75
1007ec681f3Smrg   bool invert;
1017ec681f3Smrg#endif
1027ec681f3Smrg};
1037ec681f3Smrg
1047ec681f3Smrgstruct mi_reg_num {
1057ec681f3Smrg   uint32_t num;
1067ec681f3Smrg#if GFX_VER >= 11
1077ec681f3Smrg   bool cs;
1087ec681f3Smrg#endif
1097ec681f3Smrg};
1107ec681f3Smrg
1117ec681f3Smrgstatic inline struct mi_reg_num
1127ec681f3Smrgmi_adjust_reg_num(uint32_t reg)
1137ec681f3Smrg{
1147ec681f3Smrg#if GFX_VER >= 11
1157ec681f3Smrg   bool cs = reg >= 0x2000 && reg < 0x4000;
1167ec681f3Smrg   return (struct mi_reg_num) {
1177ec681f3Smrg      .num = reg - (cs ? 0x2000 : 0),
1187ec681f3Smrg      .cs = cs,
1197ec681f3Smrg   };
1207ec681f3Smrg#else
1217ec681f3Smrg   return (struct mi_reg_num) { .num = reg, };
1227ec681f3Smrg#endif
1237ec681f3Smrg}
1247ec681f3Smrg
1257ec681f3Smrg#if GFX_VER >= 9
1267ec681f3Smrg#define MI_BUILDER_MAX_MATH_DWORDS 256
1277ec681f3Smrg#else
1287ec681f3Smrg#define MI_BUILDER_MAX_MATH_DWORDS 64
1297ec681f3Smrg#endif
1307ec681f3Smrg
1317ec681f3Smrgstruct mi_builder {
1327ec681f3Smrg   const struct intel_device_info *devinfo;
1337ec681f3Smrg   __gen_user_data *user_data;
1347ec681f3Smrg
1357ec681f3Smrg#if GFX_VERx10 >= 75
1367ec681f3Smrg   uint32_t gprs;
1377ec681f3Smrg   uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
1387ec681f3Smrg
1397ec681f3Smrg   unsigned num_math_dwords;
1407ec681f3Smrg   uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
1417ec681f3Smrg#endif
1427ec681f3Smrg};
1437ec681f3Smrg
1447ec681f3Smrgstatic inline void
1457ec681f3Smrgmi_builder_init(struct mi_builder *b,
1467ec681f3Smrg                const struct intel_device_info *devinfo,
1477ec681f3Smrg                __gen_user_data *user_data)
1487ec681f3Smrg{
1497ec681f3Smrg   memset(b, 0, sizeof(*b));
1507ec681f3Smrg   b->devinfo = devinfo;
1517ec681f3Smrg   b->user_data = user_data;
1527ec681f3Smrg
1537ec681f3Smrg#if GFX_VERx10 >= 75
1547ec681f3Smrg   b->gprs = 0;
1557ec681f3Smrg   b->num_math_dwords = 0;
1567ec681f3Smrg#endif
1577ec681f3Smrg}
1587ec681f3Smrg
1597ec681f3Smrgstatic inline void
1607ec681f3Smrgmi_builder_flush_math(struct mi_builder *b)
1617ec681f3Smrg{
1627ec681f3Smrg#if GFX_VERx10 >= 75
1637ec681f3Smrg   if (b->num_math_dwords == 0)
1647ec681f3Smrg      return;
1657ec681f3Smrg
1667ec681f3Smrg   uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
1677ec681f3Smrg                                                     1 + b->num_math_dwords);
1687ec681f3Smrg   mi_builder_pack(b, GENX(MI_MATH), dw, math) {
1697ec681f3Smrg      math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
1707ec681f3Smrg   }
1717ec681f3Smrg   memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
1727ec681f3Smrg   b->num_math_dwords = 0;
1737ec681f3Smrg#endif
1747ec681f3Smrg}
1757ec681f3Smrg
1767ec681f3Smrg#define _MI_BUILDER_GPR_BASE 0x2600
1777ec681f3Smrg/* The actual hardware limit on GPRs */
1787ec681f3Smrg#define _MI_BUILDER_NUM_HW_GPRS 16
1797ec681f3Smrg
1807ec681f3Smrg#if GFX_VERx10 >= 75
1817ec681f3Smrg
1827ec681f3Smrgstatic inline bool
1837ec681f3Smrgmi_value_is_reg(struct mi_value val)
1847ec681f3Smrg{
1857ec681f3Smrg   return val.type == MI_VALUE_TYPE_REG32 ||
1867ec681f3Smrg          val.type == MI_VALUE_TYPE_REG64;
1877ec681f3Smrg}
1887ec681f3Smrg
1897ec681f3Smrgstatic inline bool
1907ec681f3Smrgmi_value_is_gpr(struct mi_value val)
1917ec681f3Smrg{
1927ec681f3Smrg   return mi_value_is_reg(val) &&
1937ec681f3Smrg          val.reg >= _MI_BUILDER_GPR_BASE &&
1947ec681f3Smrg          val.reg < _MI_BUILDER_GPR_BASE +
1957ec681f3Smrg                    _MI_BUILDER_NUM_HW_GPRS * 8;
1967ec681f3Smrg}
1977ec681f3Smrg
1987ec681f3Smrgstatic inline bool
1997ec681f3Smrg_mi_value_is_allocated_gpr(struct mi_value val)
2007ec681f3Smrg{
2017ec681f3Smrg   return mi_value_is_reg(val) &&
2027ec681f3Smrg          val.reg >= _MI_BUILDER_GPR_BASE &&
2037ec681f3Smrg          val.reg < _MI_BUILDER_GPR_BASE +
2047ec681f3Smrg                    MI_BUILDER_NUM_ALLOC_GPRS * 8;
2057ec681f3Smrg}
2067ec681f3Smrg
2077ec681f3Smrgstatic inline uint32_t
2087ec681f3Smrg_mi_value_as_gpr(struct mi_value val)
2097ec681f3Smrg{
2107ec681f3Smrg   assert(mi_value_is_gpr(val));
2117ec681f3Smrg   assert(val.reg % 8 == 0);
2127ec681f3Smrg   return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
2137ec681f3Smrg}
2147ec681f3Smrg
2157ec681f3Smrgstatic inline struct mi_value
2167ec681f3Smrgmi_new_gpr(struct mi_builder *b)
2177ec681f3Smrg{
2187ec681f3Smrg   unsigned gpr = ffs(~b->gprs) - 1;
2197ec681f3Smrg   assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
2207ec681f3Smrg   assert(b->gpr_refs[gpr] == 0);
2217ec681f3Smrg   b->gprs |= (1u << gpr);
2227ec681f3Smrg   b->gpr_refs[gpr] = 1;
2237ec681f3Smrg
2247ec681f3Smrg   return (struct mi_value) {
2257ec681f3Smrg      .type = MI_VALUE_TYPE_REG64,
2267ec681f3Smrg      .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
2277ec681f3Smrg   };
2287ec681f3Smrg}
2297ec681f3Smrg#endif /* GFX_VERx10 >= 75 */
2307ec681f3Smrg
2317ec681f3Smrg/** Take a reference to a mi_value
2327ec681f3Smrg *
2337ec681f3Smrg * The MI builder uses reference counting to automatically free ALU GPRs for
2347ec681f3Smrg * re-use in calculations.  All mi_* math functions consume the reference
2357ec681f3Smrg * they are handed for each source and return a reference to a value which the
2367ec681f3Smrg * caller must consume.  In particular, if you pas the same value into a
2377ec681f3Smrg * single mi_* math function twice (say to add a number to itself), you
2387ec681f3Smrg * are responsible for calling mi_value_ref() to get a second reference
2397ec681f3Smrg * because the mi_* math function will consume it twice.
2407ec681f3Smrg */
2417ec681f3Smrgstatic inline struct mi_value
2427ec681f3Smrgmi_value_ref(struct mi_builder *b, struct mi_value val)
2437ec681f3Smrg{
2447ec681f3Smrg#if GFX_VERx10 >= 75
2457ec681f3Smrg   if (_mi_value_is_allocated_gpr(val)) {
2467ec681f3Smrg      unsigned gpr = _mi_value_as_gpr(val);
2477ec681f3Smrg      assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
2487ec681f3Smrg      assert(b->gprs & (1u << gpr));
2497ec681f3Smrg      assert(b->gpr_refs[gpr] < UINT8_MAX);
2507ec681f3Smrg      b->gpr_refs[gpr]++;
2517ec681f3Smrg   }
2527ec681f3Smrg#endif /* GFX_VERx10 >= 75 */
2537ec681f3Smrg
2547ec681f3Smrg   return val;
2557ec681f3Smrg}
2567ec681f3Smrg
2577ec681f3Smrg/** Drop a reference to a mi_value
2587ec681f3Smrg *
2597ec681f3Smrg * See also mi_value_ref.
2607ec681f3Smrg */
2617ec681f3Smrgstatic inline void
2627ec681f3Smrgmi_value_unref(struct mi_builder *b, struct mi_value val)
2637ec681f3Smrg{
2647ec681f3Smrg#if GFX_VERx10 >= 75
2657ec681f3Smrg   if (_mi_value_is_allocated_gpr(val)) {
2667ec681f3Smrg      unsigned gpr = _mi_value_as_gpr(val);
2677ec681f3Smrg      assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
2687ec681f3Smrg      assert(b->gprs & (1u << gpr));
2697ec681f3Smrg      assert(b->gpr_refs[gpr] > 0);
2707ec681f3Smrg      if (--b->gpr_refs[gpr] == 0)
2717ec681f3Smrg         b->gprs &= ~(1u << gpr);
2727ec681f3Smrg   }
2737ec681f3Smrg#endif /* GFX_VERx10 >= 75 */
2747ec681f3Smrg}
2757ec681f3Smrg
2767ec681f3Smrgstatic inline struct mi_value
2777ec681f3Smrgmi_imm(uint64_t imm)
2787ec681f3Smrg{
2797ec681f3Smrg   return (struct mi_value) {
2807ec681f3Smrg      .type = MI_VALUE_TYPE_IMM,
2817ec681f3Smrg      .imm = imm,
2827ec681f3Smrg   };
2837ec681f3Smrg}
2847ec681f3Smrg
2857ec681f3Smrgstatic inline struct mi_value
2867ec681f3Smrgmi_reg32(uint32_t reg)
2877ec681f3Smrg{
2887ec681f3Smrg   struct mi_value val = {
2897ec681f3Smrg      .type = MI_VALUE_TYPE_REG32,
2907ec681f3Smrg      .reg = reg,
2917ec681f3Smrg   };
2927ec681f3Smrg#if GFX_VERx10 >= 75
2937ec681f3Smrg   assert(!_mi_value_is_allocated_gpr(val));
2947ec681f3Smrg#endif
2957ec681f3Smrg   return val;
2967ec681f3Smrg}
2977ec681f3Smrg
2987ec681f3Smrgstatic inline struct mi_value
2997ec681f3Smrgmi_reg64(uint32_t reg)
3007ec681f3Smrg{
3017ec681f3Smrg   struct mi_value val = {
3027ec681f3Smrg      .type = MI_VALUE_TYPE_REG64,
3037ec681f3Smrg      .reg = reg,
3047ec681f3Smrg   };
3057ec681f3Smrg#if GFX_VERx10 >= 75
3067ec681f3Smrg   assert(!_mi_value_is_allocated_gpr(val));
3077ec681f3Smrg#endif
3087ec681f3Smrg   return val;
3097ec681f3Smrg}
3107ec681f3Smrg
3117ec681f3Smrgstatic inline struct mi_value
3127ec681f3Smrgmi_mem32(__gen_address_type addr)
3137ec681f3Smrg{
3147ec681f3Smrg   return (struct mi_value) {
3157ec681f3Smrg      .type = MI_VALUE_TYPE_MEM32,
3167ec681f3Smrg      .addr = addr,
3177ec681f3Smrg   };
3187ec681f3Smrg}
3197ec681f3Smrg
3207ec681f3Smrgstatic inline struct mi_value
3217ec681f3Smrgmi_mem64(__gen_address_type addr)
3227ec681f3Smrg{
3237ec681f3Smrg   return (struct mi_value) {
3247ec681f3Smrg      .type = MI_VALUE_TYPE_MEM64,
3257ec681f3Smrg      .addr = addr,
3267ec681f3Smrg   };
3277ec681f3Smrg}
3287ec681f3Smrg
3297ec681f3Smrgstatic inline struct mi_value
3307ec681f3Smrgmi_value_half(struct mi_value value, bool top_32_bits)
3317ec681f3Smrg{
3327ec681f3Smrg   switch (value.type) {
3337ec681f3Smrg   case MI_VALUE_TYPE_IMM:
3347ec681f3Smrg      if (top_32_bits)
3357ec681f3Smrg         value.imm >>= 32;
3367ec681f3Smrg      else
3377ec681f3Smrg         value.imm &= 0xffffffffu;
3387ec681f3Smrg      return value;
3397ec681f3Smrg
3407ec681f3Smrg   case MI_VALUE_TYPE_MEM32:
3417ec681f3Smrg      assert(!top_32_bits);
3427ec681f3Smrg      return value;
3437ec681f3Smrg
3447ec681f3Smrg   case MI_VALUE_TYPE_MEM64:
3457ec681f3Smrg      if (top_32_bits)
3467ec681f3Smrg         value.addr = __gen_address_offset(value.addr, 4);
3477ec681f3Smrg      value.type = MI_VALUE_TYPE_MEM32;
3487ec681f3Smrg      return value;
3497ec681f3Smrg
3507ec681f3Smrg   case MI_VALUE_TYPE_REG32:
3517ec681f3Smrg      assert(!top_32_bits);
3527ec681f3Smrg      return value;
3537ec681f3Smrg
3547ec681f3Smrg   case MI_VALUE_TYPE_REG64:
3557ec681f3Smrg      if (top_32_bits)
3567ec681f3Smrg         value.reg += 4;
3577ec681f3Smrg      value.type = MI_VALUE_TYPE_REG32;
3587ec681f3Smrg      return value;
3597ec681f3Smrg   }
3607ec681f3Smrg
3617ec681f3Smrg   unreachable("Invalid mi_value type");
3627ec681f3Smrg}
3637ec681f3Smrg
3647ec681f3Smrgstatic inline void
3657ec681f3Smrg_mi_copy_no_unref(struct mi_builder *b,
3667ec681f3Smrg                  struct mi_value dst, struct mi_value src)
3677ec681f3Smrg{
3687ec681f3Smrg#if GFX_VERx10 >= 75
3697ec681f3Smrg   /* TODO: We could handle src.invert by emitting a bit of math if we really
3707ec681f3Smrg    * wanted to.
3717ec681f3Smrg    */
3727ec681f3Smrg   assert(!dst.invert && !src.invert);
3737ec681f3Smrg#endif
3747ec681f3Smrg   mi_builder_flush_math(b);
3757ec681f3Smrg
3767ec681f3Smrg   switch (dst.type) {
3777ec681f3Smrg   case MI_VALUE_TYPE_IMM:
3787ec681f3Smrg      unreachable("Cannot copy to an immediate");
3797ec681f3Smrg
3807ec681f3Smrg   case MI_VALUE_TYPE_MEM64:
3817ec681f3Smrg   case MI_VALUE_TYPE_REG64:
3827ec681f3Smrg      switch (src.type) {
3837ec681f3Smrg      case MI_VALUE_TYPE_IMM:
3847ec681f3Smrg         if (dst.type == MI_VALUE_TYPE_REG64) {
3857ec681f3Smrg            uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
3867ec681f3Smrg                                                              GENX(MI_LOAD_REGISTER_IMM_length) + 2);
3877ec681f3Smrg            struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
3887ec681f3Smrg            mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
3897ec681f3Smrg               lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
3907ec681f3Smrg                                 GENX(MI_LOAD_REGISTER_IMM_length_bias);
3917ec681f3Smrg#if GFX_VER >= 11
3927ec681f3Smrg               lri.AddCSMMIOStartOffset = reg.cs;
3937ec681f3Smrg#endif
3947ec681f3Smrg            }
3957ec681f3Smrg            dw[1] = reg.num;
3967ec681f3Smrg            dw[2] = src.imm;
3977ec681f3Smrg            dw[3] = reg.num + 4;
3987ec681f3Smrg            dw[4] = src.imm >> 32;
3997ec681f3Smrg         } else {
4007ec681f3Smrg#if GFX_VER >= 8
4017ec681f3Smrg            assert(dst.type == MI_VALUE_TYPE_MEM64);
4027ec681f3Smrg            uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
4037ec681f3Smrg                                                              GENX(MI_STORE_DATA_IMM_length) + 1);
4047ec681f3Smrg            mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
4057ec681f3Smrg               sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
4067ec681f3Smrg                                 GENX(MI_STORE_DATA_IMM_length_bias);
4077ec681f3Smrg               sdm.StoreQword = true;
4087ec681f3Smrg               sdm.Address = dst.addr;
4097ec681f3Smrg            }
4107ec681f3Smrg            dw[3] = src.imm;
4117ec681f3Smrg            dw[4] = src.imm >> 32;
4127ec681f3Smrg#else
4137ec681f3Smrg         _mi_copy_no_unref(b, mi_value_half(dst, false),
4147ec681f3Smrg                              mi_value_half(src, false));
4157ec681f3Smrg         _mi_copy_no_unref(b, mi_value_half(dst, true),
4167ec681f3Smrg                              mi_value_half(src, true));
4177ec681f3Smrg#endif
4187ec681f3Smrg         }
4197ec681f3Smrg         break;
4207ec681f3Smrg      case MI_VALUE_TYPE_REG32:
4217ec681f3Smrg      case MI_VALUE_TYPE_MEM32:
4227ec681f3Smrg         _mi_copy_no_unref(b, mi_value_half(dst, false),
4237ec681f3Smrg                              mi_value_half(src, false));
4247ec681f3Smrg         _mi_copy_no_unref(b, mi_value_half(dst, true),
4257ec681f3Smrg                              mi_imm(0));
4267ec681f3Smrg         break;
4277ec681f3Smrg      case MI_VALUE_TYPE_REG64:
4287ec681f3Smrg      case MI_VALUE_TYPE_MEM64:
4297ec681f3Smrg         _mi_copy_no_unref(b, mi_value_half(dst, false),
4307ec681f3Smrg                              mi_value_half(src, false));
4317ec681f3Smrg         _mi_copy_no_unref(b, mi_value_half(dst, true),
4327ec681f3Smrg                              mi_value_half(src, true));
4337ec681f3Smrg         break;
4347ec681f3Smrg      default:
4357ec681f3Smrg         unreachable("Invalid mi_value type");
4367ec681f3Smrg      }
4377ec681f3Smrg      break;
4387ec681f3Smrg
4397ec681f3Smrg   case MI_VALUE_TYPE_MEM32:
4407ec681f3Smrg      switch (src.type) {
4417ec681f3Smrg      case MI_VALUE_TYPE_IMM:
4427ec681f3Smrg         mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
4437ec681f3Smrg            sdi.Address = dst.addr;
4447ec681f3Smrg#if GFX_VER >= 12
4457ec681f3Smrg            sdi.ForceWriteCompletionCheck = true;
4467ec681f3Smrg#endif
4477ec681f3Smrg            sdi.ImmediateData = src.imm;
4487ec681f3Smrg         }
4497ec681f3Smrg         break;
4507ec681f3Smrg
4517ec681f3Smrg      case MI_VALUE_TYPE_MEM32:
4527ec681f3Smrg      case MI_VALUE_TYPE_MEM64:
4537ec681f3Smrg#if GFX_VER >= 8
4547ec681f3Smrg         mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
4557ec681f3Smrg            cmm.DestinationMemoryAddress = dst.addr;
4567ec681f3Smrg            cmm.SourceMemoryAddress = src.addr;
4577ec681f3Smrg         }
4587ec681f3Smrg#elif GFX_VERx10 == 75
4597ec681f3Smrg         {
4607ec681f3Smrg            struct mi_value tmp = mi_new_gpr(b);
4617ec681f3Smrg            _mi_copy_no_unref(b, tmp, src);
4627ec681f3Smrg            _mi_copy_no_unref(b, dst, tmp);
4637ec681f3Smrg            mi_value_unref(b, tmp);
4647ec681f3Smrg         }
4657ec681f3Smrg#else
4667ec681f3Smrg         unreachable("Cannot do mem <-> mem copy on IVB and earlier");
4677ec681f3Smrg#endif
4687ec681f3Smrg         break;
4697ec681f3Smrg
4707ec681f3Smrg      case MI_VALUE_TYPE_REG32:
4717ec681f3Smrg      case MI_VALUE_TYPE_REG64:
4727ec681f3Smrg         mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
4737ec681f3Smrg            struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
4747ec681f3Smrg            srm.RegisterAddress = reg.num;
4757ec681f3Smrg#if GFX_VER >= 11
4767ec681f3Smrg            srm.AddCSMMIOStartOffset = reg.cs;
4777ec681f3Smrg#endif
4787ec681f3Smrg            srm.MemoryAddress = dst.addr;
4797ec681f3Smrg         }
4807ec681f3Smrg         break;
4817ec681f3Smrg
4827ec681f3Smrg      default:
4837ec681f3Smrg         unreachable("Invalid mi_value type");
4847ec681f3Smrg      }
4857ec681f3Smrg      break;
4867ec681f3Smrg
4877ec681f3Smrg   case MI_VALUE_TYPE_REG32:
4887ec681f3Smrg      switch (src.type) {
4897ec681f3Smrg      case MI_VALUE_TYPE_IMM:
4907ec681f3Smrg         mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
4917ec681f3Smrg            struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
4927ec681f3Smrg            lri.RegisterOffset = reg.num;
4937ec681f3Smrg#if GFX_VER >= 11
4947ec681f3Smrg            lri.AddCSMMIOStartOffset = reg.cs;
4957ec681f3Smrg#endif
4967ec681f3Smrg            lri.DataDWord = src.imm;
4977ec681f3Smrg         }
4987ec681f3Smrg         break;
4997ec681f3Smrg
5007ec681f3Smrg      case MI_VALUE_TYPE_MEM32:
5017ec681f3Smrg      case MI_VALUE_TYPE_MEM64:
5027ec681f3Smrg#if GFX_VER >= 7
5037ec681f3Smrg         mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
5047ec681f3Smrg            struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
5057ec681f3Smrg            lrm.RegisterAddress = reg.num;
5067ec681f3Smrg#if GFX_VER >= 11
5077ec681f3Smrg            lrm.AddCSMMIOStartOffset = reg.cs;
5087ec681f3Smrg#endif
5097ec681f3Smrg            lrm.MemoryAddress = src.addr;
5107ec681f3Smrg         }
5117ec681f3Smrg#else
5127ec681f3Smrg         unreachable("Cannot load do mem -> reg copy on SNB and earlier");
5137ec681f3Smrg#endif
5147ec681f3Smrg         break;
5157ec681f3Smrg
5167ec681f3Smrg      case MI_VALUE_TYPE_REG32:
5177ec681f3Smrg      case MI_VALUE_TYPE_REG64:
5187ec681f3Smrg#if GFX_VERx10 >= 75
5197ec681f3Smrg         if (src.reg != dst.reg) {
5207ec681f3Smrg            mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
5217ec681f3Smrg               struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
5227ec681f3Smrg               lrr.SourceRegisterAddress = reg.num;
5237ec681f3Smrg#if GFX_VER >= 11
5247ec681f3Smrg               lrr.AddCSMMIOStartOffsetSource = reg.cs;
5257ec681f3Smrg#endif
5267ec681f3Smrg               reg = mi_adjust_reg_num(dst.reg);
5277ec681f3Smrg               lrr.DestinationRegisterAddress = reg.num;
5287ec681f3Smrg#if GFX_VER >= 11
5297ec681f3Smrg               lrr.AddCSMMIOStartOffsetDestination = reg.cs;
5307ec681f3Smrg#endif
5317ec681f3Smrg            }
5327ec681f3Smrg         }
5337ec681f3Smrg#else
5347ec681f3Smrg         unreachable("Cannot do reg <-> reg copy on IVB and earlier");
5357ec681f3Smrg#endif
5367ec681f3Smrg         break;
5377ec681f3Smrg
5387ec681f3Smrg      default:
5397ec681f3Smrg         unreachable("Invalid mi_value type");
5407ec681f3Smrg      }
5417ec681f3Smrg      break;
5427ec681f3Smrg
5437ec681f3Smrg   default:
5447ec681f3Smrg      unreachable("Invalid mi_value type");
5457ec681f3Smrg   }
5467ec681f3Smrg}
5477ec681f3Smrg
5487ec681f3Smrg#if GFX_VERx10 >= 75
5497ec681f3Smrgstatic inline struct mi_value
5507ec681f3Smrgmi_resolve_invert(struct mi_builder *b, struct mi_value src);
5517ec681f3Smrg#endif
5527ec681f3Smrg
5537ec681f3Smrg/** Store the value in src to the value represented by dst
5547ec681f3Smrg *
5557ec681f3Smrg * If the bit size of src and dst mismatch, this function does an unsigned
5567ec681f3Smrg * integer cast.  If src has more bits than dst, it takes the bottom bits.  If
5577ec681f3Smrg * src has fewer bits then dst, it fills the top bits with zeros.
5587ec681f3Smrg *
5597ec681f3Smrg * This function consumes one reference for each of src and dst.
5607ec681f3Smrg */
5617ec681f3Smrgstatic inline void
5627ec681f3Smrgmi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
5637ec681f3Smrg{
5647ec681f3Smrg#if GFX_VERx10 >= 75
5657ec681f3Smrg   src = mi_resolve_invert(b, src);
5667ec681f3Smrg#endif
5677ec681f3Smrg   _mi_copy_no_unref(b, dst, src);
5687ec681f3Smrg   mi_value_unref(b, src);
5697ec681f3Smrg   mi_value_unref(b, dst);
5707ec681f3Smrg}
5717ec681f3Smrg
5727ec681f3Smrgstatic inline void
5737ec681f3Smrgmi_memset(struct mi_builder *b, __gen_address_type dst,
5747ec681f3Smrg          uint32_t value, uint32_t size)
5757ec681f3Smrg{
5767ec681f3Smrg#if GFX_VERx10 >= 75
5777ec681f3Smrg   assert(b->num_math_dwords == 0);
5787ec681f3Smrg#endif
5797ec681f3Smrg
5807ec681f3Smrg   /* This memset operates in units of dwords. */
5817ec681f3Smrg   assert(size % 4 == 0);
5827ec681f3Smrg
5837ec681f3Smrg   for (uint32_t i = 0; i < size; i += 4) {
5847ec681f3Smrg      mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
5857ec681f3Smrg                      mi_imm(value));
5867ec681f3Smrg   }
5877ec681f3Smrg}
5887ec681f3Smrg
5897ec681f3Smrg/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
5907ec681f3Smrgstatic inline void
5917ec681f3Smrgmi_memcpy(struct mi_builder *b, __gen_address_type dst,
5927ec681f3Smrg          __gen_address_type src, uint32_t size)
5937ec681f3Smrg{
5947ec681f3Smrg#if GFX_VERx10 >= 75
5957ec681f3Smrg   assert(b->num_math_dwords == 0);
5967ec681f3Smrg#endif
5977ec681f3Smrg
5987ec681f3Smrg   /* This memcpy operates in units of dwords. */
5997ec681f3Smrg   assert(size % 4 == 0);
6007ec681f3Smrg
6017ec681f3Smrg   for (uint32_t i = 0; i < size; i += 4) {
6027ec681f3Smrg      struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
6037ec681f3Smrg      struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
6047ec681f3Smrg#if GFX_VERx10 >= 75
6057ec681f3Smrg      mi_store(b, dst_val, src_val);
6067ec681f3Smrg#else
6077ec681f3Smrg      /* IVB does not have a general purpose register for command streamer
6087ec681f3Smrg       * commands. Therefore, we use an alternate temporary register.
6097ec681f3Smrg       */
6107ec681f3Smrg      struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
6117ec681f3Smrg      mi_store(b, tmp_reg, src_val);
6127ec681f3Smrg      mi_store(b, dst_val, tmp_reg);
6137ec681f3Smrg#endif
6147ec681f3Smrg   }
6157ec681f3Smrg}
6167ec681f3Smrg
6177ec681f3Smrg/*
6187ec681f3Smrg * MI_MATH Section.  Only available on Haswell+
6197ec681f3Smrg */
6207ec681f3Smrg
6217ec681f3Smrg#if GFX_VERx10 >= 75
6227ec681f3Smrg
6237ec681f3Smrg/**
6247ec681f3Smrg * Perform a predicated store (assuming the condition is already loaded
6257ec681f3Smrg * in the MI_PREDICATE_RESULT register) of the value in src to the memory
6267ec681f3Smrg * location specified by dst.  Non-memory destinations are not supported.
6277ec681f3Smrg *
6287ec681f3Smrg * This function consumes one reference for each of src and dst.
6297ec681f3Smrg */
6307ec681f3Smrgstatic inline void
6317ec681f3Smrgmi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
6327ec681f3Smrg{
6337ec681f3Smrg   assert(!dst.invert && !src.invert);
6347ec681f3Smrg
6357ec681f3Smrg   mi_builder_flush_math(b);
6367ec681f3Smrg
6377ec681f3Smrg   /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
6387ec681f3Smrg    * destination to be memory, and resolve the source to a temporary
6397ec681f3Smrg    * register if it isn't in one already.
6407ec681f3Smrg    */
6417ec681f3Smrg   assert(dst.type == MI_VALUE_TYPE_MEM64 ||
6427ec681f3Smrg          dst.type == MI_VALUE_TYPE_MEM32);
6437ec681f3Smrg
6447ec681f3Smrg   if (src.type != MI_VALUE_TYPE_REG32 &&
6457ec681f3Smrg       src.type != MI_VALUE_TYPE_REG64) {
6467ec681f3Smrg      struct mi_value tmp = mi_new_gpr(b);
6477ec681f3Smrg      _mi_copy_no_unref(b, tmp, src);
6487ec681f3Smrg      src = tmp;
6497ec681f3Smrg   }
6507ec681f3Smrg
6517ec681f3Smrg   if (dst.type == MI_VALUE_TYPE_MEM64) {
6527ec681f3Smrg      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
6537ec681f3Smrg         struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
6547ec681f3Smrg         srm.RegisterAddress = reg.num;
6557ec681f3Smrg#if GFX_VER >= 11
6567ec681f3Smrg         srm.AddCSMMIOStartOffset = reg.cs;
6577ec681f3Smrg#endif
6587ec681f3Smrg         srm.MemoryAddress = dst.addr;
6597ec681f3Smrg         srm.PredicateEnable = true;
6607ec681f3Smrg      }
6617ec681f3Smrg      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
6627ec681f3Smrg         struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
6637ec681f3Smrg         srm.RegisterAddress = reg.num;
6647ec681f3Smrg#if GFX_VER >= 11
6657ec681f3Smrg         srm.AddCSMMIOStartOffset = reg.cs;
6667ec681f3Smrg#endif
6677ec681f3Smrg         srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
6687ec681f3Smrg         srm.PredicateEnable = true;
6697ec681f3Smrg      }
6707ec681f3Smrg   } else {
6717ec681f3Smrg      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
6727ec681f3Smrg         struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
6737ec681f3Smrg         srm.RegisterAddress = reg.num;
6747ec681f3Smrg#if GFX_VER >= 11
6757ec681f3Smrg         srm.AddCSMMIOStartOffset = reg.cs;
6767ec681f3Smrg#endif
6777ec681f3Smrg         srm.MemoryAddress = dst.addr;
6787ec681f3Smrg         srm.PredicateEnable = true;
6797ec681f3Smrg      }
6807ec681f3Smrg   }
6817ec681f3Smrg
6827ec681f3Smrg   mi_value_unref(b, src);
6837ec681f3Smrg   mi_value_unref(b, dst);
6847ec681f3Smrg}
6857ec681f3Smrg
6867ec681f3Smrgstatic inline void
6877ec681f3Smrg_mi_builder_push_math(struct mi_builder *b,
6887ec681f3Smrg                      const uint32_t *dwords,
6897ec681f3Smrg                      unsigned num_dwords)
6907ec681f3Smrg{
6917ec681f3Smrg   assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
6927ec681f3Smrg   if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
6937ec681f3Smrg      mi_builder_flush_math(b);
6947ec681f3Smrg
6957ec681f3Smrg   memcpy(&b->math_dwords[b->num_math_dwords],
6967ec681f3Smrg          dwords, num_dwords * sizeof(*dwords));
6977ec681f3Smrg   b->num_math_dwords += num_dwords;
6987ec681f3Smrg}
6997ec681f3Smrg
7007ec681f3Smrgstatic inline uint32_t
7017ec681f3Smrg_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
7027ec681f3Smrg{
7037ec681f3Smrg   struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
7047ec681f3Smrg      .Operand2 = operand2,
7057ec681f3Smrg      .Operand1 = operand1,
7067ec681f3Smrg      .ALUOpcode = opcode,
7077ec681f3Smrg   };
7087ec681f3Smrg
7097ec681f3Smrg   uint32_t dw;
7107ec681f3Smrg   GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
7117ec681f3Smrg
7127ec681f3Smrg   return dw;
7137ec681f3Smrg}
7147ec681f3Smrg
7157ec681f3Smrgstatic inline struct mi_value
7167ec681f3Smrgmi_value_to_gpr(struct mi_builder *b, struct mi_value val)
7177ec681f3Smrg{
7187ec681f3Smrg   if (mi_value_is_gpr(val))
7197ec681f3Smrg      return val;
7207ec681f3Smrg
7217ec681f3Smrg   /* Save off the invert flag because it makes copy() grumpy */
7227ec681f3Smrg   bool invert = val.invert;
7237ec681f3Smrg   val.invert = false;
7247ec681f3Smrg
7257ec681f3Smrg   struct mi_value tmp = mi_new_gpr(b);
7267ec681f3Smrg   _mi_copy_no_unref(b, tmp, val);
7277ec681f3Smrg   tmp.invert = invert;
7287ec681f3Smrg
7297ec681f3Smrg   return tmp;
7307ec681f3Smrg}
7317ec681f3Smrg
7327ec681f3Smrgstatic inline uint64_t
7337ec681f3Smrgmi_value_to_u64(struct mi_value val)
7347ec681f3Smrg{
7357ec681f3Smrg   assert(val.type == MI_VALUE_TYPE_IMM);
7367ec681f3Smrg   return val.invert ? ~val.imm : val.imm;
7377ec681f3Smrg}
7387ec681f3Smrg
7397ec681f3Smrgstatic inline uint32_t
7407ec681f3Smrg_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
7417ec681f3Smrg{
7427ec681f3Smrg   if (val->type == MI_VALUE_TYPE_IMM &&
7437ec681f3Smrg       (val->imm == 0 || val->imm == UINT64_MAX)) {
7447ec681f3Smrg      uint64_t imm = val->invert ? ~val->imm : val->imm;
7457ec681f3Smrg      return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
7467ec681f3Smrg   } else {
7477ec681f3Smrg      *val = mi_value_to_gpr(b, *val);
7487ec681f3Smrg      return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
7497ec681f3Smrg                          src, _mi_value_as_gpr(*val));
7507ec681f3Smrg   }
7517ec681f3Smrg}
7527ec681f3Smrg
7537ec681f3Smrgstatic inline struct mi_value
7547ec681f3Smrgmi_math_binop(struct mi_builder *b, uint32_t opcode,
7557ec681f3Smrg              struct mi_value src0, struct mi_value src1,
7567ec681f3Smrg              uint32_t store_op, uint32_t store_src)
7577ec681f3Smrg{
7587ec681f3Smrg   struct mi_value dst = mi_new_gpr(b);
7597ec681f3Smrg
7607ec681f3Smrg   uint32_t dw[4];
7617ec681f3Smrg   dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
7627ec681f3Smrg   dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
7637ec681f3Smrg   dw[2] = _mi_pack_alu(opcode, 0, 0);
7647ec681f3Smrg   dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
7657ec681f3Smrg   _mi_builder_push_math(b, dw, 4);
7667ec681f3Smrg
7677ec681f3Smrg   mi_value_unref(b, src0);
7687ec681f3Smrg   mi_value_unref(b, src1);
7697ec681f3Smrg
7707ec681f3Smrg   return dst;
7717ec681f3Smrg}
7727ec681f3Smrg
7737ec681f3Smrgstatic inline struct mi_value
7747ec681f3Smrgmi_inot(struct mi_builder *b, struct mi_value val)
7757ec681f3Smrg{
7767ec681f3Smrg   if (val.type == MI_VALUE_TYPE_IMM)
7777ec681f3Smrg      return mi_imm(~mi_value_to_u64(val));
7787ec681f3Smrg
7797ec681f3Smrg   val.invert = !val.invert;
7807ec681f3Smrg   return val;
7817ec681f3Smrg}
7827ec681f3Smrg
7837ec681f3Smrgstatic inline struct mi_value
7847ec681f3Smrgmi_resolve_invert(struct mi_builder *b, struct mi_value src)
7857ec681f3Smrg{
7867ec681f3Smrg   if (!src.invert)
7877ec681f3Smrg      return src;
7887ec681f3Smrg
7897ec681f3Smrg   assert(src.type != MI_VALUE_TYPE_IMM);
7907ec681f3Smrg   return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
7917ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ACCU);
7927ec681f3Smrg}
7937ec681f3Smrg
7947ec681f3Smrgstatic inline struct mi_value
7957ec681f3Smrgmi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
7967ec681f3Smrg{
7977ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
7987ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
7997ec681f3Smrg
8007ec681f3Smrg   return mi_math_binop(b, MI_ALU_ADD, src0, src1,
8017ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ACCU);
8027ec681f3Smrg}
8037ec681f3Smrg
8047ec681f3Smrgstatic inline struct mi_value
8057ec681f3Smrgmi_iadd_imm(struct mi_builder *b,
8067ec681f3Smrg                struct mi_value src, uint64_t N)
8077ec681f3Smrg{
8087ec681f3Smrg   if (N == 0)
8097ec681f3Smrg      return src;
8107ec681f3Smrg
8117ec681f3Smrg   return mi_iadd(b, src, mi_imm(N));
8127ec681f3Smrg}
8137ec681f3Smrg
8147ec681f3Smrgstatic inline struct mi_value
8157ec681f3Smrgmi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
8167ec681f3Smrg{
8177ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
8187ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
8197ec681f3Smrg
8207ec681f3Smrg   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
8217ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ACCU);
8227ec681f3Smrg}
8237ec681f3Smrg
8247ec681f3Smrgstatic inline struct mi_value
8257ec681f3Smrgmi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
8267ec681f3Smrg{
8277ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
8287ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
8297ec681f3Smrg
8307ec681f3Smrg   /* Compute "equal" by subtracting and storing the zero bit */
8317ec681f3Smrg   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
8327ec681f3Smrg                            MI_ALU_STORE, MI_ALU_ZF);
8337ec681f3Smrg}
8347ec681f3Smrg
8357ec681f3Smrgstatic inline struct mi_value
8367ec681f3Smrgmi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
8377ec681f3Smrg{
8387ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
8397ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
8407ec681f3Smrg
8417ec681f3Smrg   /* Compute "not equal" by subtracting and storing the inverse zero bit */
8427ec681f3Smrg   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
8437ec681f3Smrg                            MI_ALU_STOREINV, MI_ALU_ZF);
8447ec681f3Smrg}
8457ec681f3Smrg
8467ec681f3Smrgstatic inline struct mi_value
8477ec681f3Smrgmi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
8487ec681f3Smrg{
8497ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
8507ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
8517ec681f3Smrg
8527ec681f3Smrg   /* Compute "less than" by subtracting and storing the carry bit */
8537ec681f3Smrg   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
8547ec681f3Smrg                           MI_ALU_STORE, MI_ALU_CF);
8557ec681f3Smrg}
8567ec681f3Smrg
8577ec681f3Smrgstatic inline struct mi_value
8587ec681f3Smrgmi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
8597ec681f3Smrg{
8607ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
8617ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
8627ec681f3Smrg
8637ec681f3Smrg   /* Compute "less than" by subtracting and storing the carry bit */
8647ec681f3Smrg   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
8657ec681f3Smrg                           MI_ALU_STOREINV, MI_ALU_CF);
8667ec681f3Smrg}
8677ec681f3Smrg
8687ec681f3Smrgstatic inline struct mi_value
8697ec681f3Smrgmi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
8707ec681f3Smrg{
8717ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
8727ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
8737ec681f3Smrg
8747ec681f3Smrg   return mi_math_binop(b, MI_ALU_AND, src0, src1,
8757ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ACCU);
8767ec681f3Smrg}
8777ec681f3Smrg
8787ec681f3Smrgstatic inline struct mi_value
8797ec681f3Smrgmi_nz(struct mi_builder *b, struct mi_value src)
8807ec681f3Smrg{
8817ec681f3Smrg   if (src.type == MI_VALUE_TYPE_IMM)
8827ec681f3Smrg      return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
8837ec681f3Smrg
8847ec681f3Smrg   return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
8857ec681f3Smrg                           MI_ALU_STOREINV, MI_ALU_ZF);
8867ec681f3Smrg}
8877ec681f3Smrg
8887ec681f3Smrgstatic inline struct mi_value
8897ec681f3Smrgmi_z(struct mi_builder *b, struct mi_value src)
8907ec681f3Smrg{
8917ec681f3Smrg   if (src.type == MI_VALUE_TYPE_IMM)
8927ec681f3Smrg      return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
8937ec681f3Smrg
8947ec681f3Smrg   return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
8957ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ZF);
8967ec681f3Smrg}
8977ec681f3Smrg
8987ec681f3Smrgstatic inline struct mi_value
8997ec681f3Smrgmi_ior(struct mi_builder *b,
9007ec681f3Smrg       struct mi_value src0, struct mi_value src1)
9017ec681f3Smrg{
9027ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
9037ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
9047ec681f3Smrg
9057ec681f3Smrg   return mi_math_binop(b, MI_ALU_OR, src0, src1,
9067ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ACCU);
9077ec681f3Smrg}
9087ec681f3Smrg
9097ec681f3Smrg#if GFX_VERx10 >= 125
9107ec681f3Smrgstatic inline struct mi_value
9117ec681f3Smrgmi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
9127ec681f3Smrg{
9137ec681f3Smrg   if (src1.type == MI_VALUE_TYPE_IMM) {
9147ec681f3Smrg      assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
9157ec681f3Smrg      assert(mi_value_to_u64(src1) <= 32);
9167ec681f3Smrg   }
9177ec681f3Smrg
9187ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
9197ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
9207ec681f3Smrg
9217ec681f3Smrg   return mi_math_binop(b, MI_ALU_SHL, src0, src1,
9227ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ACCU);
9237ec681f3Smrg}
9247ec681f3Smrg
9257ec681f3Smrgstatic inline struct mi_value
9267ec681f3Smrgmi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
9277ec681f3Smrg{
9287ec681f3Smrg   if (src1.type == MI_VALUE_TYPE_IMM) {
9297ec681f3Smrg      assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
9307ec681f3Smrg      assert(mi_value_to_u64(src1) <= 32);
9317ec681f3Smrg   }
9327ec681f3Smrg
9337ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
9347ec681f3Smrg      return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
9357ec681f3Smrg
9367ec681f3Smrg   return mi_math_binop(b, MI_ALU_SHR, src0, src1,
9377ec681f3Smrg                           MI_ALU_STORE, MI_ALU_ACCU);
9387ec681f3Smrg}
9397ec681f3Smrg
9407ec681f3Smrgstatic inline struct mi_value
9417ec681f3Smrgmi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
9427ec681f3Smrg{
9437ec681f3Smrg   if (shift == 0)
9447ec681f3Smrg      return src;
9457ec681f3Smrg
9467ec681f3Smrg   if (shift >= 64)
9477ec681f3Smrg      return mi_imm(0);
9487ec681f3Smrg
9497ec681f3Smrg   if (src.type == MI_VALUE_TYPE_IMM)
9507ec681f3Smrg      return mi_imm(mi_value_to_u64(src) >> shift);
9517ec681f3Smrg
9527ec681f3Smrg   struct mi_value res = mi_value_to_gpr(b, src);
9537ec681f3Smrg
9547ec681f3Smrg   /* Annoyingly, we only have power-of-two shifts */
9557ec681f3Smrg   while (shift) {
9567ec681f3Smrg      int bit = u_bit_scan(&shift);
9577ec681f3Smrg      assert(bit <= 5);
9587ec681f3Smrg      res = mi_ushr(b, res, mi_imm(1 << bit));
9597ec681f3Smrg   }
9607ec681f3Smrg
9617ec681f3Smrg   return res;
9627ec681f3Smrg}
9637ec681f3Smrg
9647ec681f3Smrgstatic inline struct mi_value
9657ec681f3Smrgmi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
9667ec681f3Smrg{
9677ec681f3Smrg   if (src1.type == MI_VALUE_TYPE_IMM) {
9687ec681f3Smrg      assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
9697ec681f3Smrg      assert(mi_value_to_u64(src1) <= 32);
9707ec681f3Smrg   }
9717ec681f3Smrg
9727ec681f3Smrg   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
9737ec681f3Smrg      return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
9747ec681f3Smrg
9757ec681f3Smrg   return mi_math_binop(b, MI_ALU_SAR, src0, src1,
9767ec681f3Smrg                            MI_ALU_STORE, MI_ALU_ACCU);
9777ec681f3Smrg}
9787ec681f3Smrg
9797ec681f3Smrgstatic inline struct mi_value
9807ec681f3Smrgmi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
9817ec681f3Smrg{
9827ec681f3Smrg   if (shift == 0)
9837ec681f3Smrg      return src;
9847ec681f3Smrg
9857ec681f3Smrg   if (shift >= 64)
9867ec681f3Smrg      return mi_imm(0);
9877ec681f3Smrg
9887ec681f3Smrg   if (src.type == MI_VALUE_TYPE_IMM)
9897ec681f3Smrg      return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
9907ec681f3Smrg
9917ec681f3Smrg   struct mi_value res = mi_value_to_gpr(b, src);
9927ec681f3Smrg
9937ec681f3Smrg   /* Annoyingly, we only have power-of-two shifts */
9947ec681f3Smrg   while (shift) {
9957ec681f3Smrg      int bit = u_bit_scan(&shift);
9967ec681f3Smrg      assert(bit <= 5);
9977ec681f3Smrg      res = mi_ishr(b, res, mi_imm(1 << bit));
9987ec681f3Smrg   }
9997ec681f3Smrg
10007ec681f3Smrg   return res;
10017ec681f3Smrg}
10027ec681f3Smrg#endif /* if GFX_VERx10 >= 125 */
10037ec681f3Smrg
10047ec681f3Smrgstatic inline struct mi_value
10057ec681f3Smrgmi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
10067ec681f3Smrg{
10077ec681f3Smrg   if (src.type == MI_VALUE_TYPE_IMM)
10087ec681f3Smrg      return mi_imm(mi_value_to_u64(src) * N);
10097ec681f3Smrg
10107ec681f3Smrg   if (N == 0) {
10117ec681f3Smrg      mi_value_unref(b, src);
10127ec681f3Smrg      return mi_imm(0);
10137ec681f3Smrg   }
10147ec681f3Smrg
10157ec681f3Smrg   if (N == 1)
10167ec681f3Smrg      return src;
10177ec681f3Smrg
10187ec681f3Smrg   src = mi_value_to_gpr(b, src);
10197ec681f3Smrg
10207ec681f3Smrg   struct mi_value res = mi_value_ref(b, src);
10217ec681f3Smrg
10227ec681f3Smrg   unsigned top_bit = 31 - __builtin_clz(N);
10237ec681f3Smrg   for (int i = top_bit - 1; i >= 0; i--) {
10247ec681f3Smrg      res = mi_iadd(b, res, mi_value_ref(b, res));
10257ec681f3Smrg      if (N & (1 << i))
10267ec681f3Smrg         res = mi_iadd(b, res, mi_value_ref(b, src));
10277ec681f3Smrg   }
10287ec681f3Smrg
10297ec681f3Smrg   mi_value_unref(b, src);
10307ec681f3Smrg
10317ec681f3Smrg   return res;
10327ec681f3Smrg}
10337ec681f3Smrg
10347ec681f3Smrgstatic inline struct mi_value
10357ec681f3Smrgmi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
10367ec681f3Smrg{
10377ec681f3Smrg   if (shift == 0)
10387ec681f3Smrg      return src;
10397ec681f3Smrg
10407ec681f3Smrg   if (shift >= 64)
10417ec681f3Smrg      return mi_imm(0);
10427ec681f3Smrg
10437ec681f3Smrg   if (src.type == MI_VALUE_TYPE_IMM)
10447ec681f3Smrg      return mi_imm(mi_value_to_u64(src) << shift);
10457ec681f3Smrg
10467ec681f3Smrg   struct mi_value res = mi_value_to_gpr(b, src);
10477ec681f3Smrg
10487ec681f3Smrg#if GFX_VERx10 >= 125
10497ec681f3Smrg   /* Annoyingly, we only have power-of-two shifts */
10507ec681f3Smrg   while (shift) {
10517ec681f3Smrg      int bit = u_bit_scan(&shift);
10527ec681f3Smrg      assert(bit <= 5);
10537ec681f3Smrg      res = mi_ishl(b, res, mi_imm(1 << bit));
10547ec681f3Smrg   }
10557ec681f3Smrg#else
10567ec681f3Smrg   for (unsigned i = 0; i < shift; i++)
10577ec681f3Smrg      res = mi_iadd(b, res, mi_value_ref(b, res));
10587ec681f3Smrg#endif
10597ec681f3Smrg
10607ec681f3Smrg   return res;
10617ec681f3Smrg}
10627ec681f3Smrg
10637ec681f3Smrgstatic inline struct mi_value
10647ec681f3Smrgmi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
10657ec681f3Smrg{
10667ec681f3Smrg   if (shift == 0)
10677ec681f3Smrg      return src;
10687ec681f3Smrg
10697ec681f3Smrg   if (shift >= 64)
10707ec681f3Smrg      return mi_imm(0);
10717ec681f3Smrg
10727ec681f3Smrg   /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
10737ec681f3Smrg    * of the result.
10747ec681f3Smrg    */
10757ec681f3Smrg   if (src.type == MI_VALUE_TYPE_IMM)
10767ec681f3Smrg      return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
10777ec681f3Smrg
10787ec681f3Smrg   if (shift > 32) {
10797ec681f3Smrg      struct mi_value tmp = mi_new_gpr(b);
10807ec681f3Smrg      _mi_copy_no_unref(b, mi_value_half(tmp, false),
10817ec681f3Smrg                               mi_value_half(src, true));
10827ec681f3Smrg      _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
10837ec681f3Smrg      mi_value_unref(b, src);
10847ec681f3Smrg      src = tmp;
10857ec681f3Smrg      shift -= 32;
10867ec681f3Smrg   }
10877ec681f3Smrg   assert(shift <= 32);
10887ec681f3Smrg   struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
10897ec681f3Smrg   struct mi_value dst = mi_new_gpr(b);
10907ec681f3Smrg   _mi_copy_no_unref(b, mi_value_half(dst, false),
10917ec681f3Smrg                            mi_value_half(tmp, true));
10927ec681f3Smrg   _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
10937ec681f3Smrg   mi_value_unref(b, tmp);
10947ec681f3Smrg   return dst;
10957ec681f3Smrg}
10967ec681f3Smrg
10977ec681f3Smrgstatic inline struct mi_value
10987ec681f3Smrgmi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
10997ec681f3Smrg{
11007ec681f3Smrg   if (N.type == MI_VALUE_TYPE_IMM) {
11017ec681f3Smrg      assert(mi_value_to_u64(N) <= UINT32_MAX);
11027ec681f3Smrg      return mi_imm(mi_value_to_u64(N) / D);
11037ec681f3Smrg   }
11047ec681f3Smrg
11057ec681f3Smrg   /* We implicitly assume that N is only a 32-bit value */
11067ec681f3Smrg   if (D == 0) {
11077ec681f3Smrg      /* This is invalid but we should do something */
11087ec681f3Smrg      return mi_imm(0);
11097ec681f3Smrg   } else if (util_is_power_of_two_or_zero(D)) {
11107ec681f3Smrg      return mi_ushr32_imm(b, N, util_logbase2(D));
11117ec681f3Smrg   } else {
11127ec681f3Smrg      struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
11137ec681f3Smrg      assert(m.multiplier <= UINT32_MAX);
11147ec681f3Smrg
11157ec681f3Smrg      if (m.pre_shift)
11167ec681f3Smrg         N = mi_ushr32_imm(b, N, m.pre_shift);
11177ec681f3Smrg
11187ec681f3Smrg      /* Do the 32x32 multiply  into gpr0 */
11197ec681f3Smrg      N = mi_imul_imm(b, N, m.multiplier);
11207ec681f3Smrg
11217ec681f3Smrg      if (m.increment)
11227ec681f3Smrg         N = mi_iadd(b, N, mi_imm(m.multiplier));
11237ec681f3Smrg
11247ec681f3Smrg      N = mi_ushr32_imm(b, N, 32);
11257ec681f3Smrg
11267ec681f3Smrg      if (m.post_shift)
11277ec681f3Smrg         N = mi_ushr32_imm(b, N, m.post_shift);
11287ec681f3Smrg
11297ec681f3Smrg      return N;
11307ec681f3Smrg   }
11317ec681f3Smrg}
11327ec681f3Smrg
11337ec681f3Smrg#endif /* MI_MATH section */
11347ec681f3Smrg
11357ec681f3Smrg/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
11367ec681f3Smrg#if MI_BUILDER_CAN_WRITE_BATCH
11377ec681f3Smrg
11387ec681f3Smrgstruct mi_address_token {
11397ec681f3Smrg   /* Pointers to address memory fields in the batch. */
11407ec681f3Smrg   uint64_t *ptrs[2];
11417ec681f3Smrg};
11427ec681f3Smrg
11437ec681f3Smrgstatic inline struct mi_address_token
11447ec681f3Smrgmi_store_address(struct mi_builder *b, struct mi_value addr_reg)
11457ec681f3Smrg{
11467ec681f3Smrg   mi_builder_flush_math(b);
11477ec681f3Smrg
11487ec681f3Smrg   assert(addr_reg.type == MI_VALUE_TYPE_REG64);
11497ec681f3Smrg
11507ec681f3Smrg   struct mi_address_token token = {};
11517ec681f3Smrg
11527ec681f3Smrg   for (unsigned i = 0; i < 2; i++) {
11537ec681f3Smrg      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
11547ec681f3Smrg         srm.RegisterAddress = addr_reg.reg + (i * 4);
11557ec681f3Smrg
11567ec681f3Smrg         const unsigned addr_dw =
11577ec681f3Smrg            GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
11587ec681f3Smrg         token.ptrs[i] = (void *)_dst + addr_dw;
11597ec681f3Smrg      }
11607ec681f3Smrg   }
11617ec681f3Smrg
11627ec681f3Smrg   mi_value_unref(b, addr_reg);
11637ec681f3Smrg   return token;
11647ec681f3Smrg}
11657ec681f3Smrg
11667ec681f3Smrgstatic inline void
11677ec681f3Smrgmi_self_mod_barrier(struct mi_builder *b)
11687ec681f3Smrg{
11697ec681f3Smrg   /* First make sure all the memory writes from previous modifying commands
11707ec681f3Smrg    * have landed. We want to do this before going through the CS cache,
11717ec681f3Smrg    * otherwise we could be fetching memory that hasn't been written to yet.
11727ec681f3Smrg    */
11737ec681f3Smrg   mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
11747ec681f3Smrg      pc.CommandStreamerStallEnable = true;
11757ec681f3Smrg   }
11767ec681f3Smrg   /* Documentation says Gfx11+ should be able to invalidate the command cache
11777ec681f3Smrg    * but experiment show it doesn't work properly, so for now just get over
11787ec681f3Smrg    * the CS prefetch.
11797ec681f3Smrg    */
11807ec681f3Smrg   for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
11817ec681f3Smrg      mi_builder_emit(b, GENX(MI_NOOP), noop);
11827ec681f3Smrg}
11837ec681f3Smrg
11847ec681f3Smrgstatic inline void
11857ec681f3Smrg_mi_resolve_address_token(struct mi_builder *b,
11867ec681f3Smrg                          struct mi_address_token token,
11877ec681f3Smrg                          void *batch_location)
11887ec681f3Smrg{
11897ec681f3Smrg   __gen_address_type addr = __gen_get_batch_address(b->user_data,
11907ec681f3Smrg                                                    batch_location);
11917ec681f3Smrg   uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
11927ec681f3Smrg                                                  addr, 0);
11937ec681f3Smrg   *(token.ptrs[0]) = addr_addr_u64;
11947ec681f3Smrg   *(token.ptrs[1]) = addr_addr_u64 + 4;
11957ec681f3Smrg}
11967ec681f3Smrg
11977ec681f3Smrg#endif /* MI_BUILDER_CAN_WRITE_BATCH */
11987ec681f3Smrg
11997ec681f3Smrg#if GFX_VERx10 >= 125
12007ec681f3Smrg
12017ec681f3Smrg/*
12027ec681f3Smrg * Indirect load/store.  Only available on XE_HP+
12037ec681f3Smrg */
12047ec681f3Smrg
12057ec681f3SmrgMUST_CHECK static inline struct mi_value
12067ec681f3Smrgmi_load_mem64_offset(struct mi_builder *b,
12077ec681f3Smrg                     __gen_address_type addr, struct mi_value offset)
12087ec681f3Smrg{
12097ec681f3Smrg   uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
12107ec681f3Smrg   struct mi_value addr_val = mi_imm(addr_u64);
12117ec681f3Smrg
12127ec681f3Smrg   struct mi_value dst = mi_new_gpr(b);
12137ec681f3Smrg
12147ec681f3Smrg   uint32_t dw[5];
12157ec681f3Smrg   dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
12167ec681f3Smrg   dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
12177ec681f3Smrg   dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
12187ec681f3Smrg   dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
12197ec681f3Smrg   dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
12207ec681f3Smrg   _mi_builder_push_math(b, dw, 5);
12217ec681f3Smrg
12227ec681f3Smrg   mi_value_unref(b, addr_val);
12237ec681f3Smrg   mi_value_unref(b, offset);
12247ec681f3Smrg
12257ec681f3Smrg   return dst;
12267ec681f3Smrg}
12277ec681f3Smrg
12287ec681f3Smrgstatic inline void
12297ec681f3Smrgmi_store_mem64_offset(struct mi_builder *b,
12307ec681f3Smrg                          __gen_address_type addr, struct mi_value offset,
12317ec681f3Smrg                          struct mi_value data)
12327ec681f3Smrg{
12337ec681f3Smrg   uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
12347ec681f3Smrg   struct mi_value addr_val = mi_imm(addr_u64);
12357ec681f3Smrg
12367ec681f3Smrg   data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
12377ec681f3Smrg
12387ec681f3Smrg   uint32_t dw[5];
12397ec681f3Smrg   dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
12407ec681f3Smrg   dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
12417ec681f3Smrg   dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
12427ec681f3Smrg   dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
12437ec681f3Smrg   dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
12447ec681f3Smrg   _mi_builder_push_math(b, dw, 5);
12457ec681f3Smrg
12467ec681f3Smrg   mi_value_unref(b, addr_val);
12477ec681f3Smrg   mi_value_unref(b, offset);
12487ec681f3Smrg   mi_value_unref(b, data);
12497ec681f3Smrg
12507ec681f3Smrg   /* This is the only math case which has side-effects outside of regular
12517ec681f3Smrg    * registers to flush math afterwards so we don't confuse anyone.
12527ec681f3Smrg    */
12537ec681f3Smrg   mi_builder_flush_math(b);
12547ec681f3Smrg}
12557ec681f3Smrg
12567ec681f3Smrg/*
12577ec681f3Smrg * Control-flow Section.  Only available on XE_HP+
12587ec681f3Smrg */
12597ec681f3Smrg
12607ec681f3Smrgstruct _mi_goto {
12617ec681f3Smrg   bool predicated;
12627ec681f3Smrg   void *mi_bbs;
12637ec681f3Smrg};
12647ec681f3Smrg
12657ec681f3Smrgstruct mi_goto_target {
12667ec681f3Smrg   bool placed;
12677ec681f3Smrg   unsigned num_gotos;
12687ec681f3Smrg   struct _mi_goto gotos[8];
12697ec681f3Smrg   __gen_address_type addr;
12707ec681f3Smrg};
12717ec681f3Smrg
12727ec681f3Smrg#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
12737ec681f3Smrg
12747ec681f3Smrg#define MI_BUILDER_MI_PREDICATE_RESULT_num  0x2418
12757ec681f3Smrg
12767ec681f3Smrgstatic inline void
12777ec681f3Smrgmi_goto_if(struct mi_builder *b, struct mi_value cond,
12787ec681f3Smrg           struct mi_goto_target *t)
12797ec681f3Smrg{
12807ec681f3Smrg   /* First, set up the predicate, if any */
12817ec681f3Smrg   bool predicated;
12827ec681f3Smrg   if (cond.type == MI_VALUE_TYPE_IMM) {
12837ec681f3Smrg      /* If it's an immediate, the goto either doesn't happen or happens
12847ec681f3Smrg       * unconditionally.
12857ec681f3Smrg       */
12867ec681f3Smrg      if (mi_value_to_u64(cond) == 0)
12877ec681f3Smrg         return;
12887ec681f3Smrg
12897ec681f3Smrg      assert(mi_value_to_u64(cond) == ~0ull);
12907ec681f3Smrg      predicated = false;
12917ec681f3Smrg   } else if (mi_value_is_reg(cond) &&
12927ec681f3Smrg              cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
12937ec681f3Smrg      /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
12947ec681f3Smrg       * provided us with
12957ec681f3Smrg       */
12967ec681f3Smrg      assert(cond.type == MI_VALUE_TYPE_REG32);
12977ec681f3Smrg      predicated = true;
12987ec681f3Smrg   } else {
12997ec681f3Smrg      mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
13007ec681f3Smrg      predicated = true;
13017ec681f3Smrg   }
13027ec681f3Smrg
13037ec681f3Smrg   if (predicated) {
13047ec681f3Smrg      mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
13057ec681f3Smrg         sp.PredicateEnable = NOOPOnResultClear;
13067ec681f3Smrg      }
13077ec681f3Smrg   }
13087ec681f3Smrg   if (t->placed) {
13097ec681f3Smrg      mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
13107ec681f3Smrg         bbs.PredicationEnable         = predicated;
13117ec681f3Smrg         bbs.AddressSpaceIndicator     = ASI_PPGTT;
13127ec681f3Smrg         bbs.BatchBufferStartAddress   = t->addr;
13137ec681f3Smrg      }
13147ec681f3Smrg   } else {
13157ec681f3Smrg      assert(t->num_gotos < ARRAY_SIZE(t->gotos));
13167ec681f3Smrg      struct _mi_goto g = {
13177ec681f3Smrg         .predicated = predicated,
13187ec681f3Smrg         .mi_bbs = __gen_get_batch_dwords(b->user_data,
13197ec681f3Smrg                                          GENX(MI_BATCH_BUFFER_START_length)),
13207ec681f3Smrg      };
13217ec681f3Smrg      memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
13227ec681f3Smrg      t->gotos[t->num_gotos++] = g;
13237ec681f3Smrg   }
13247ec681f3Smrg   if (predicated) {
13257ec681f3Smrg      mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
13267ec681f3Smrg         sp.PredicateEnable = NOOPNever;
13277ec681f3Smrg      }
13287ec681f3Smrg   }
13297ec681f3Smrg}
13307ec681f3Smrg
13317ec681f3Smrgstatic inline void
13327ec681f3Smrgmi_goto(struct mi_builder *b, struct mi_goto_target *t)
13337ec681f3Smrg{
13347ec681f3Smrg   mi_goto_if(b, mi_imm(-1), t);
13357ec681f3Smrg}
13367ec681f3Smrg
13377ec681f3Smrgstatic inline void
13387ec681f3Smrgmi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
13397ec681f3Smrg{
13407ec681f3Smrg   mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
13417ec681f3Smrg      sp.PredicateEnable = NOOPNever;
13427ec681f3Smrg      t->addr = __gen_get_batch_address(b->user_data, _dst);
13437ec681f3Smrg   }
13447ec681f3Smrg   t->placed = true;
13457ec681f3Smrg
13467ec681f3Smrg   struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
13477ec681f3Smrg   bbs.AddressSpaceIndicator     = ASI_PPGTT;
13487ec681f3Smrg   bbs.BatchBufferStartAddress   = t->addr;
13497ec681f3Smrg
13507ec681f3Smrg   for (unsigned i = 0; i < t->num_gotos; i++) {
13517ec681f3Smrg      bbs.PredicationEnable = t->gotos[i].predicated;
13527ec681f3Smrg      GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
13537ec681f3Smrg   }
13547ec681f3Smrg}
13557ec681f3Smrg
13567ec681f3Smrgstatic inline struct mi_goto_target
13577ec681f3Smrgmi_goto_target_init_and_place(struct mi_builder *b)
13587ec681f3Smrg{
13597ec681f3Smrg   struct mi_goto_target t = MI_GOTO_TARGET_INIT;
13607ec681f3Smrg   mi_goto_target(b, &t);
13617ec681f3Smrg   return t;
13627ec681f3Smrg}
13637ec681f3Smrg
13647ec681f3Smrg#define mi_loop(b) \
13657ec681f3Smrg   for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
13667ec681f3Smrg        __continue = mi_goto_target_init_and_place(b); !__break.placed; \
13677ec681f3Smrg        mi_goto(b, &__continue), mi_goto_target(b, &__break))
13687ec681f3Smrg
13697ec681f3Smrg#define mi_break(b) mi_goto(b, &__break)
13707ec681f3Smrg#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
13717ec681f3Smrg#define mi_continue(b) mi_goto(b, &__continue)
13727ec681f3Smrg#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
13737ec681f3Smrg
13747ec681f3Smrg#endif /* GFX_VERx10 >= 125 */
13757ec681f3Smrg
13767ec681f3Smrg#endif /* MI_BUILDER_H */
1377