17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2019 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#ifndef MI_BUILDER_H 257ec681f3Smrg#define MI_BUILDER_H 267ec681f3Smrg 277ec681f3Smrg#include "dev/intel_device_info.h" 287ec681f3Smrg#include "genxml/genX_bits.h" 297ec681f3Smrg#include "util/bitscan.h" 307ec681f3Smrg#include "util/fast_idiv_by_const.h" 317ec681f3Smrg#include "util/u_math.h" 327ec681f3Smrg 337ec681f3Smrg#ifndef MI_BUILDER_NUM_ALLOC_GPRS 347ec681f3Smrg/** The number of GPRs the MI builder is allowed to allocate 357ec681f3Smrg * 367ec681f3Smrg * This may be set by a user of this API so that it can reserve some GPRs at 377ec681f3Smrg * the top end for its own use. 387ec681f3Smrg */ 397ec681f3Smrg#define MI_BUILDER_NUM_ALLOC_GPRS 16 407ec681f3Smrg#endif 417ec681f3Smrg 427ec681f3Smrg/** These must be defined by the user of the builder 437ec681f3Smrg * 447ec681f3Smrg * void *__gen_get_batch_dwords(__gen_user_data *user_data, 457ec681f3Smrg * unsigned num_dwords); 467ec681f3Smrg * 477ec681f3Smrg * __gen_address_type 487ec681f3Smrg * __gen_address_offset(__gen_address_type addr, uint64_t offset); 497ec681f3Smrg * 507ec681f3Smrg * 517ec681f3Smrg * If self-modifying batches are supported, we must be able to pass batch 527ec681f3Smrg * addresses around as void*s so pinning as well as batch chaining or some 537ec681f3Smrg * other mechanism for ensuring batch pointers remain valid during building is 547ec681f3Smrg * required. The following function must also be defined, it returns an 557ec681f3Smrg * address in canonical form: 567ec681f3Smrg * 577ec681f3Smrg * __gen_address_type 587ec681f3Smrg * __gen_get_batch_address(__gen_user_data *user_data, void *location); 597ec681f3Smrg * 607ec681f3Smrg * Also, __gen_combine_address must accept a location value of NULL and return 617ec681f3Smrg * a fully valid 64-bit address. 627ec681f3Smrg */ 637ec681f3Smrg 647ec681f3Smrg/* 657ec681f3Smrg * Start of the actual MI builder 667ec681f3Smrg */ 677ec681f3Smrg 687ec681f3Smrg#define __genxml_cmd_length(cmd) cmd ## _length 697ec681f3Smrg#define __genxml_cmd_header(cmd) cmd ## _header 707ec681f3Smrg#define __genxml_cmd_pack(cmd) cmd ## _pack 717ec681f3Smrg 727ec681f3Smrg#define mi_builder_pack(b, cmd, dst, name) \ 737ec681f3Smrg for (struct cmd name = { __genxml_cmd_header(cmd) }, \ 747ec681f3Smrg *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \ 757ec681f3Smrg __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \ 767ec681f3Smrg _dst = NULL) 777ec681f3Smrg 787ec681f3Smrg#define mi_builder_emit(b, cmd, name) \ 797ec681f3Smrg mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name) 807ec681f3Smrg 817ec681f3Smrg 827ec681f3Smrgenum mi_value_type { 837ec681f3Smrg MI_VALUE_TYPE_IMM, 847ec681f3Smrg MI_VALUE_TYPE_MEM32, 857ec681f3Smrg MI_VALUE_TYPE_MEM64, 867ec681f3Smrg MI_VALUE_TYPE_REG32, 877ec681f3Smrg MI_VALUE_TYPE_REG64, 887ec681f3Smrg}; 897ec681f3Smrg 907ec681f3Smrgstruct mi_value { 917ec681f3Smrg enum mi_value_type type; 927ec681f3Smrg 937ec681f3Smrg union { 947ec681f3Smrg uint64_t imm; 957ec681f3Smrg __gen_address_type addr; 967ec681f3Smrg uint32_t reg; 977ec681f3Smrg }; 987ec681f3Smrg 997ec681f3Smrg#if GFX_VERx10 >= 75 1007ec681f3Smrg bool invert; 1017ec681f3Smrg#endif 1027ec681f3Smrg}; 1037ec681f3Smrg 1047ec681f3Smrgstruct mi_reg_num { 1057ec681f3Smrg uint32_t num; 1067ec681f3Smrg#if GFX_VER >= 11 1077ec681f3Smrg bool cs; 1087ec681f3Smrg#endif 1097ec681f3Smrg}; 1107ec681f3Smrg 1117ec681f3Smrgstatic inline struct mi_reg_num 1127ec681f3Smrgmi_adjust_reg_num(uint32_t reg) 1137ec681f3Smrg{ 1147ec681f3Smrg#if GFX_VER >= 11 1157ec681f3Smrg bool cs = reg >= 0x2000 && reg < 0x4000; 1167ec681f3Smrg return (struct mi_reg_num) { 1177ec681f3Smrg .num = reg - (cs ? 0x2000 : 0), 1187ec681f3Smrg .cs = cs, 1197ec681f3Smrg }; 1207ec681f3Smrg#else 1217ec681f3Smrg return (struct mi_reg_num) { .num = reg, }; 1227ec681f3Smrg#endif 1237ec681f3Smrg} 1247ec681f3Smrg 1257ec681f3Smrg#if GFX_VER >= 9 1267ec681f3Smrg#define MI_BUILDER_MAX_MATH_DWORDS 256 1277ec681f3Smrg#else 1287ec681f3Smrg#define MI_BUILDER_MAX_MATH_DWORDS 64 1297ec681f3Smrg#endif 1307ec681f3Smrg 1317ec681f3Smrgstruct mi_builder { 1327ec681f3Smrg const struct intel_device_info *devinfo; 1337ec681f3Smrg __gen_user_data *user_data; 1347ec681f3Smrg 1357ec681f3Smrg#if GFX_VERx10 >= 75 1367ec681f3Smrg uint32_t gprs; 1377ec681f3Smrg uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS]; 1387ec681f3Smrg 1397ec681f3Smrg unsigned num_math_dwords; 1407ec681f3Smrg uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS]; 1417ec681f3Smrg#endif 1427ec681f3Smrg}; 1437ec681f3Smrg 1447ec681f3Smrgstatic inline void 1457ec681f3Smrgmi_builder_init(struct mi_builder *b, 1467ec681f3Smrg const struct intel_device_info *devinfo, 1477ec681f3Smrg __gen_user_data *user_data) 1487ec681f3Smrg{ 1497ec681f3Smrg memset(b, 0, sizeof(*b)); 1507ec681f3Smrg b->devinfo = devinfo; 1517ec681f3Smrg b->user_data = user_data; 1527ec681f3Smrg 1537ec681f3Smrg#if GFX_VERx10 >= 75 1547ec681f3Smrg b->gprs = 0; 1557ec681f3Smrg b->num_math_dwords = 0; 1567ec681f3Smrg#endif 1577ec681f3Smrg} 1587ec681f3Smrg 1597ec681f3Smrgstatic inline void 1607ec681f3Smrgmi_builder_flush_math(struct mi_builder *b) 1617ec681f3Smrg{ 1627ec681f3Smrg#if GFX_VERx10 >= 75 1637ec681f3Smrg if (b->num_math_dwords == 0) 1647ec681f3Smrg return; 1657ec681f3Smrg 1667ec681f3Smrg uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 1677ec681f3Smrg 1 + b->num_math_dwords); 1687ec681f3Smrg mi_builder_pack(b, GENX(MI_MATH), dw, math) { 1697ec681f3Smrg math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias); 1707ec681f3Smrg } 1717ec681f3Smrg memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t)); 1727ec681f3Smrg b->num_math_dwords = 0; 1737ec681f3Smrg#endif 1747ec681f3Smrg} 1757ec681f3Smrg 1767ec681f3Smrg#define _MI_BUILDER_GPR_BASE 0x2600 1777ec681f3Smrg/* The actual hardware limit on GPRs */ 1787ec681f3Smrg#define _MI_BUILDER_NUM_HW_GPRS 16 1797ec681f3Smrg 1807ec681f3Smrg#if GFX_VERx10 >= 75 1817ec681f3Smrg 1827ec681f3Smrgstatic inline bool 1837ec681f3Smrgmi_value_is_reg(struct mi_value val) 1847ec681f3Smrg{ 1857ec681f3Smrg return val.type == MI_VALUE_TYPE_REG32 || 1867ec681f3Smrg val.type == MI_VALUE_TYPE_REG64; 1877ec681f3Smrg} 1887ec681f3Smrg 1897ec681f3Smrgstatic inline bool 1907ec681f3Smrgmi_value_is_gpr(struct mi_value val) 1917ec681f3Smrg{ 1927ec681f3Smrg return mi_value_is_reg(val) && 1937ec681f3Smrg val.reg >= _MI_BUILDER_GPR_BASE && 1947ec681f3Smrg val.reg < _MI_BUILDER_GPR_BASE + 1957ec681f3Smrg _MI_BUILDER_NUM_HW_GPRS * 8; 1967ec681f3Smrg} 1977ec681f3Smrg 1987ec681f3Smrgstatic inline bool 1997ec681f3Smrg_mi_value_is_allocated_gpr(struct mi_value val) 2007ec681f3Smrg{ 2017ec681f3Smrg return mi_value_is_reg(val) && 2027ec681f3Smrg val.reg >= _MI_BUILDER_GPR_BASE && 2037ec681f3Smrg val.reg < _MI_BUILDER_GPR_BASE + 2047ec681f3Smrg MI_BUILDER_NUM_ALLOC_GPRS * 8; 2057ec681f3Smrg} 2067ec681f3Smrg 2077ec681f3Smrgstatic inline uint32_t 2087ec681f3Smrg_mi_value_as_gpr(struct mi_value val) 2097ec681f3Smrg{ 2107ec681f3Smrg assert(mi_value_is_gpr(val)); 2117ec681f3Smrg assert(val.reg % 8 == 0); 2127ec681f3Smrg return (val.reg - _MI_BUILDER_GPR_BASE) / 8; 2137ec681f3Smrg} 2147ec681f3Smrg 2157ec681f3Smrgstatic inline struct mi_value 2167ec681f3Smrgmi_new_gpr(struct mi_builder *b) 2177ec681f3Smrg{ 2187ec681f3Smrg unsigned gpr = ffs(~b->gprs) - 1; 2197ec681f3Smrg assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 2207ec681f3Smrg assert(b->gpr_refs[gpr] == 0); 2217ec681f3Smrg b->gprs |= (1u << gpr); 2227ec681f3Smrg b->gpr_refs[gpr] = 1; 2237ec681f3Smrg 2247ec681f3Smrg return (struct mi_value) { 2257ec681f3Smrg .type = MI_VALUE_TYPE_REG64, 2267ec681f3Smrg .reg = _MI_BUILDER_GPR_BASE + gpr * 8, 2277ec681f3Smrg }; 2287ec681f3Smrg} 2297ec681f3Smrg#endif /* GFX_VERx10 >= 75 */ 2307ec681f3Smrg 2317ec681f3Smrg/** Take a reference to a mi_value 2327ec681f3Smrg * 2337ec681f3Smrg * The MI builder uses reference counting to automatically free ALU GPRs for 2347ec681f3Smrg * re-use in calculations. All mi_* math functions consume the reference 2357ec681f3Smrg * they are handed for each source and return a reference to a value which the 2367ec681f3Smrg * caller must consume. In particular, if you pas the same value into a 2377ec681f3Smrg * single mi_* math function twice (say to add a number to itself), you 2387ec681f3Smrg * are responsible for calling mi_value_ref() to get a second reference 2397ec681f3Smrg * because the mi_* math function will consume it twice. 2407ec681f3Smrg */ 2417ec681f3Smrgstatic inline struct mi_value 2427ec681f3Smrgmi_value_ref(struct mi_builder *b, struct mi_value val) 2437ec681f3Smrg{ 2447ec681f3Smrg#if GFX_VERx10 >= 75 2457ec681f3Smrg if (_mi_value_is_allocated_gpr(val)) { 2467ec681f3Smrg unsigned gpr = _mi_value_as_gpr(val); 2477ec681f3Smrg assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 2487ec681f3Smrg assert(b->gprs & (1u << gpr)); 2497ec681f3Smrg assert(b->gpr_refs[gpr] < UINT8_MAX); 2507ec681f3Smrg b->gpr_refs[gpr]++; 2517ec681f3Smrg } 2527ec681f3Smrg#endif /* GFX_VERx10 >= 75 */ 2537ec681f3Smrg 2547ec681f3Smrg return val; 2557ec681f3Smrg} 2567ec681f3Smrg 2577ec681f3Smrg/** Drop a reference to a mi_value 2587ec681f3Smrg * 2597ec681f3Smrg * See also mi_value_ref. 2607ec681f3Smrg */ 2617ec681f3Smrgstatic inline void 2627ec681f3Smrgmi_value_unref(struct mi_builder *b, struct mi_value val) 2637ec681f3Smrg{ 2647ec681f3Smrg#if GFX_VERx10 >= 75 2657ec681f3Smrg if (_mi_value_is_allocated_gpr(val)) { 2667ec681f3Smrg unsigned gpr = _mi_value_as_gpr(val); 2677ec681f3Smrg assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 2687ec681f3Smrg assert(b->gprs & (1u << gpr)); 2697ec681f3Smrg assert(b->gpr_refs[gpr] > 0); 2707ec681f3Smrg if (--b->gpr_refs[gpr] == 0) 2717ec681f3Smrg b->gprs &= ~(1u << gpr); 2727ec681f3Smrg } 2737ec681f3Smrg#endif /* GFX_VERx10 >= 75 */ 2747ec681f3Smrg} 2757ec681f3Smrg 2767ec681f3Smrgstatic inline struct mi_value 2777ec681f3Smrgmi_imm(uint64_t imm) 2787ec681f3Smrg{ 2797ec681f3Smrg return (struct mi_value) { 2807ec681f3Smrg .type = MI_VALUE_TYPE_IMM, 2817ec681f3Smrg .imm = imm, 2827ec681f3Smrg }; 2837ec681f3Smrg} 2847ec681f3Smrg 2857ec681f3Smrgstatic inline struct mi_value 2867ec681f3Smrgmi_reg32(uint32_t reg) 2877ec681f3Smrg{ 2887ec681f3Smrg struct mi_value val = { 2897ec681f3Smrg .type = MI_VALUE_TYPE_REG32, 2907ec681f3Smrg .reg = reg, 2917ec681f3Smrg }; 2927ec681f3Smrg#if GFX_VERx10 >= 75 2937ec681f3Smrg assert(!_mi_value_is_allocated_gpr(val)); 2947ec681f3Smrg#endif 2957ec681f3Smrg return val; 2967ec681f3Smrg} 2977ec681f3Smrg 2987ec681f3Smrgstatic inline struct mi_value 2997ec681f3Smrgmi_reg64(uint32_t reg) 3007ec681f3Smrg{ 3017ec681f3Smrg struct mi_value val = { 3027ec681f3Smrg .type = MI_VALUE_TYPE_REG64, 3037ec681f3Smrg .reg = reg, 3047ec681f3Smrg }; 3057ec681f3Smrg#if GFX_VERx10 >= 75 3067ec681f3Smrg assert(!_mi_value_is_allocated_gpr(val)); 3077ec681f3Smrg#endif 3087ec681f3Smrg return val; 3097ec681f3Smrg} 3107ec681f3Smrg 3117ec681f3Smrgstatic inline struct mi_value 3127ec681f3Smrgmi_mem32(__gen_address_type addr) 3137ec681f3Smrg{ 3147ec681f3Smrg return (struct mi_value) { 3157ec681f3Smrg .type = MI_VALUE_TYPE_MEM32, 3167ec681f3Smrg .addr = addr, 3177ec681f3Smrg }; 3187ec681f3Smrg} 3197ec681f3Smrg 3207ec681f3Smrgstatic inline struct mi_value 3217ec681f3Smrgmi_mem64(__gen_address_type addr) 3227ec681f3Smrg{ 3237ec681f3Smrg return (struct mi_value) { 3247ec681f3Smrg .type = MI_VALUE_TYPE_MEM64, 3257ec681f3Smrg .addr = addr, 3267ec681f3Smrg }; 3277ec681f3Smrg} 3287ec681f3Smrg 3297ec681f3Smrgstatic inline struct mi_value 3307ec681f3Smrgmi_value_half(struct mi_value value, bool top_32_bits) 3317ec681f3Smrg{ 3327ec681f3Smrg switch (value.type) { 3337ec681f3Smrg case MI_VALUE_TYPE_IMM: 3347ec681f3Smrg if (top_32_bits) 3357ec681f3Smrg value.imm >>= 32; 3367ec681f3Smrg else 3377ec681f3Smrg value.imm &= 0xffffffffu; 3387ec681f3Smrg return value; 3397ec681f3Smrg 3407ec681f3Smrg case MI_VALUE_TYPE_MEM32: 3417ec681f3Smrg assert(!top_32_bits); 3427ec681f3Smrg return value; 3437ec681f3Smrg 3447ec681f3Smrg case MI_VALUE_TYPE_MEM64: 3457ec681f3Smrg if (top_32_bits) 3467ec681f3Smrg value.addr = __gen_address_offset(value.addr, 4); 3477ec681f3Smrg value.type = MI_VALUE_TYPE_MEM32; 3487ec681f3Smrg return value; 3497ec681f3Smrg 3507ec681f3Smrg case MI_VALUE_TYPE_REG32: 3517ec681f3Smrg assert(!top_32_bits); 3527ec681f3Smrg return value; 3537ec681f3Smrg 3547ec681f3Smrg case MI_VALUE_TYPE_REG64: 3557ec681f3Smrg if (top_32_bits) 3567ec681f3Smrg value.reg += 4; 3577ec681f3Smrg value.type = MI_VALUE_TYPE_REG32; 3587ec681f3Smrg return value; 3597ec681f3Smrg } 3607ec681f3Smrg 3617ec681f3Smrg unreachable("Invalid mi_value type"); 3627ec681f3Smrg} 3637ec681f3Smrg 3647ec681f3Smrgstatic inline void 3657ec681f3Smrg_mi_copy_no_unref(struct mi_builder *b, 3667ec681f3Smrg struct mi_value dst, struct mi_value src) 3677ec681f3Smrg{ 3687ec681f3Smrg#if GFX_VERx10 >= 75 3697ec681f3Smrg /* TODO: We could handle src.invert by emitting a bit of math if we really 3707ec681f3Smrg * wanted to. 3717ec681f3Smrg */ 3727ec681f3Smrg assert(!dst.invert && !src.invert); 3737ec681f3Smrg#endif 3747ec681f3Smrg mi_builder_flush_math(b); 3757ec681f3Smrg 3767ec681f3Smrg switch (dst.type) { 3777ec681f3Smrg case MI_VALUE_TYPE_IMM: 3787ec681f3Smrg unreachable("Cannot copy to an immediate"); 3797ec681f3Smrg 3807ec681f3Smrg case MI_VALUE_TYPE_MEM64: 3817ec681f3Smrg case MI_VALUE_TYPE_REG64: 3827ec681f3Smrg switch (src.type) { 3837ec681f3Smrg case MI_VALUE_TYPE_IMM: 3847ec681f3Smrg if (dst.type == MI_VALUE_TYPE_REG64) { 3857ec681f3Smrg uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 3867ec681f3Smrg GENX(MI_LOAD_REGISTER_IMM_length) + 2); 3877ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 3887ec681f3Smrg mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) { 3897ec681f3Smrg lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 - 3907ec681f3Smrg GENX(MI_LOAD_REGISTER_IMM_length_bias); 3917ec681f3Smrg#if GFX_VER >= 11 3927ec681f3Smrg lri.AddCSMMIOStartOffset = reg.cs; 3937ec681f3Smrg#endif 3947ec681f3Smrg } 3957ec681f3Smrg dw[1] = reg.num; 3967ec681f3Smrg dw[2] = src.imm; 3977ec681f3Smrg dw[3] = reg.num + 4; 3987ec681f3Smrg dw[4] = src.imm >> 32; 3997ec681f3Smrg } else { 4007ec681f3Smrg#if GFX_VER >= 8 4017ec681f3Smrg assert(dst.type == MI_VALUE_TYPE_MEM64); 4027ec681f3Smrg uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 4037ec681f3Smrg GENX(MI_STORE_DATA_IMM_length) + 1); 4047ec681f3Smrg mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) { 4057ec681f3Smrg sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 - 4067ec681f3Smrg GENX(MI_STORE_DATA_IMM_length_bias); 4077ec681f3Smrg sdm.StoreQword = true; 4087ec681f3Smrg sdm.Address = dst.addr; 4097ec681f3Smrg } 4107ec681f3Smrg dw[3] = src.imm; 4117ec681f3Smrg dw[4] = src.imm >> 32; 4127ec681f3Smrg#else 4137ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, false), 4147ec681f3Smrg mi_value_half(src, false)); 4157ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, true), 4167ec681f3Smrg mi_value_half(src, true)); 4177ec681f3Smrg#endif 4187ec681f3Smrg } 4197ec681f3Smrg break; 4207ec681f3Smrg case MI_VALUE_TYPE_REG32: 4217ec681f3Smrg case MI_VALUE_TYPE_MEM32: 4227ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, false), 4237ec681f3Smrg mi_value_half(src, false)); 4247ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, true), 4257ec681f3Smrg mi_imm(0)); 4267ec681f3Smrg break; 4277ec681f3Smrg case MI_VALUE_TYPE_REG64: 4287ec681f3Smrg case MI_VALUE_TYPE_MEM64: 4297ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, false), 4307ec681f3Smrg mi_value_half(src, false)); 4317ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, true), 4327ec681f3Smrg mi_value_half(src, true)); 4337ec681f3Smrg break; 4347ec681f3Smrg default: 4357ec681f3Smrg unreachable("Invalid mi_value type"); 4367ec681f3Smrg } 4377ec681f3Smrg break; 4387ec681f3Smrg 4397ec681f3Smrg case MI_VALUE_TYPE_MEM32: 4407ec681f3Smrg switch (src.type) { 4417ec681f3Smrg case MI_VALUE_TYPE_IMM: 4427ec681f3Smrg mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) { 4437ec681f3Smrg sdi.Address = dst.addr; 4447ec681f3Smrg#if GFX_VER >= 12 4457ec681f3Smrg sdi.ForceWriteCompletionCheck = true; 4467ec681f3Smrg#endif 4477ec681f3Smrg sdi.ImmediateData = src.imm; 4487ec681f3Smrg } 4497ec681f3Smrg break; 4507ec681f3Smrg 4517ec681f3Smrg case MI_VALUE_TYPE_MEM32: 4527ec681f3Smrg case MI_VALUE_TYPE_MEM64: 4537ec681f3Smrg#if GFX_VER >= 8 4547ec681f3Smrg mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) { 4557ec681f3Smrg cmm.DestinationMemoryAddress = dst.addr; 4567ec681f3Smrg cmm.SourceMemoryAddress = src.addr; 4577ec681f3Smrg } 4587ec681f3Smrg#elif GFX_VERx10 == 75 4597ec681f3Smrg { 4607ec681f3Smrg struct mi_value tmp = mi_new_gpr(b); 4617ec681f3Smrg _mi_copy_no_unref(b, tmp, src); 4627ec681f3Smrg _mi_copy_no_unref(b, dst, tmp); 4637ec681f3Smrg mi_value_unref(b, tmp); 4647ec681f3Smrg } 4657ec681f3Smrg#else 4667ec681f3Smrg unreachable("Cannot do mem <-> mem copy on IVB and earlier"); 4677ec681f3Smrg#endif 4687ec681f3Smrg break; 4697ec681f3Smrg 4707ec681f3Smrg case MI_VALUE_TYPE_REG32: 4717ec681f3Smrg case MI_VALUE_TYPE_REG64: 4727ec681f3Smrg mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 4737ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 4747ec681f3Smrg srm.RegisterAddress = reg.num; 4757ec681f3Smrg#if GFX_VER >= 11 4767ec681f3Smrg srm.AddCSMMIOStartOffset = reg.cs; 4777ec681f3Smrg#endif 4787ec681f3Smrg srm.MemoryAddress = dst.addr; 4797ec681f3Smrg } 4807ec681f3Smrg break; 4817ec681f3Smrg 4827ec681f3Smrg default: 4837ec681f3Smrg unreachable("Invalid mi_value type"); 4847ec681f3Smrg } 4857ec681f3Smrg break; 4867ec681f3Smrg 4877ec681f3Smrg case MI_VALUE_TYPE_REG32: 4887ec681f3Smrg switch (src.type) { 4897ec681f3Smrg case MI_VALUE_TYPE_IMM: 4907ec681f3Smrg mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) { 4917ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 4927ec681f3Smrg lri.RegisterOffset = reg.num; 4937ec681f3Smrg#if GFX_VER >= 11 4947ec681f3Smrg lri.AddCSMMIOStartOffset = reg.cs; 4957ec681f3Smrg#endif 4967ec681f3Smrg lri.DataDWord = src.imm; 4977ec681f3Smrg } 4987ec681f3Smrg break; 4997ec681f3Smrg 5007ec681f3Smrg case MI_VALUE_TYPE_MEM32: 5017ec681f3Smrg case MI_VALUE_TYPE_MEM64: 5027ec681f3Smrg#if GFX_VER >= 7 5037ec681f3Smrg mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) { 5047ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 5057ec681f3Smrg lrm.RegisterAddress = reg.num; 5067ec681f3Smrg#if GFX_VER >= 11 5077ec681f3Smrg lrm.AddCSMMIOStartOffset = reg.cs; 5087ec681f3Smrg#endif 5097ec681f3Smrg lrm.MemoryAddress = src.addr; 5107ec681f3Smrg } 5117ec681f3Smrg#else 5127ec681f3Smrg unreachable("Cannot load do mem -> reg copy on SNB and earlier"); 5137ec681f3Smrg#endif 5147ec681f3Smrg break; 5157ec681f3Smrg 5167ec681f3Smrg case MI_VALUE_TYPE_REG32: 5177ec681f3Smrg case MI_VALUE_TYPE_REG64: 5187ec681f3Smrg#if GFX_VERx10 >= 75 5197ec681f3Smrg if (src.reg != dst.reg) { 5207ec681f3Smrg mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) { 5217ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 5227ec681f3Smrg lrr.SourceRegisterAddress = reg.num; 5237ec681f3Smrg#if GFX_VER >= 11 5247ec681f3Smrg lrr.AddCSMMIOStartOffsetSource = reg.cs; 5257ec681f3Smrg#endif 5267ec681f3Smrg reg = mi_adjust_reg_num(dst.reg); 5277ec681f3Smrg lrr.DestinationRegisterAddress = reg.num; 5287ec681f3Smrg#if GFX_VER >= 11 5297ec681f3Smrg lrr.AddCSMMIOStartOffsetDestination = reg.cs; 5307ec681f3Smrg#endif 5317ec681f3Smrg } 5327ec681f3Smrg } 5337ec681f3Smrg#else 5347ec681f3Smrg unreachable("Cannot do reg <-> reg copy on IVB and earlier"); 5357ec681f3Smrg#endif 5367ec681f3Smrg break; 5377ec681f3Smrg 5387ec681f3Smrg default: 5397ec681f3Smrg unreachable("Invalid mi_value type"); 5407ec681f3Smrg } 5417ec681f3Smrg break; 5427ec681f3Smrg 5437ec681f3Smrg default: 5447ec681f3Smrg unreachable("Invalid mi_value type"); 5457ec681f3Smrg } 5467ec681f3Smrg} 5477ec681f3Smrg 5487ec681f3Smrg#if GFX_VERx10 >= 75 5497ec681f3Smrgstatic inline struct mi_value 5507ec681f3Smrgmi_resolve_invert(struct mi_builder *b, struct mi_value src); 5517ec681f3Smrg#endif 5527ec681f3Smrg 5537ec681f3Smrg/** Store the value in src to the value represented by dst 5547ec681f3Smrg * 5557ec681f3Smrg * If the bit size of src and dst mismatch, this function does an unsigned 5567ec681f3Smrg * integer cast. If src has more bits than dst, it takes the bottom bits. If 5577ec681f3Smrg * src has fewer bits then dst, it fills the top bits with zeros. 5587ec681f3Smrg * 5597ec681f3Smrg * This function consumes one reference for each of src and dst. 5607ec681f3Smrg */ 5617ec681f3Smrgstatic inline void 5627ec681f3Smrgmi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src) 5637ec681f3Smrg{ 5647ec681f3Smrg#if GFX_VERx10 >= 75 5657ec681f3Smrg src = mi_resolve_invert(b, src); 5667ec681f3Smrg#endif 5677ec681f3Smrg _mi_copy_no_unref(b, dst, src); 5687ec681f3Smrg mi_value_unref(b, src); 5697ec681f3Smrg mi_value_unref(b, dst); 5707ec681f3Smrg} 5717ec681f3Smrg 5727ec681f3Smrgstatic inline void 5737ec681f3Smrgmi_memset(struct mi_builder *b, __gen_address_type dst, 5747ec681f3Smrg uint32_t value, uint32_t size) 5757ec681f3Smrg{ 5767ec681f3Smrg#if GFX_VERx10 >= 75 5777ec681f3Smrg assert(b->num_math_dwords == 0); 5787ec681f3Smrg#endif 5797ec681f3Smrg 5807ec681f3Smrg /* This memset operates in units of dwords. */ 5817ec681f3Smrg assert(size % 4 == 0); 5827ec681f3Smrg 5837ec681f3Smrg for (uint32_t i = 0; i < size; i += 4) { 5847ec681f3Smrg mi_store(b, mi_mem32(__gen_address_offset(dst, i)), 5857ec681f3Smrg mi_imm(value)); 5867ec681f3Smrg } 5877ec681f3Smrg} 5887ec681f3Smrg 5897ec681f3Smrg/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */ 5907ec681f3Smrgstatic inline void 5917ec681f3Smrgmi_memcpy(struct mi_builder *b, __gen_address_type dst, 5927ec681f3Smrg __gen_address_type src, uint32_t size) 5937ec681f3Smrg{ 5947ec681f3Smrg#if GFX_VERx10 >= 75 5957ec681f3Smrg assert(b->num_math_dwords == 0); 5967ec681f3Smrg#endif 5977ec681f3Smrg 5987ec681f3Smrg /* This memcpy operates in units of dwords. */ 5997ec681f3Smrg assert(size % 4 == 0); 6007ec681f3Smrg 6017ec681f3Smrg for (uint32_t i = 0; i < size; i += 4) { 6027ec681f3Smrg struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i)); 6037ec681f3Smrg struct mi_value src_val = mi_mem32(__gen_address_offset(src, i)); 6047ec681f3Smrg#if GFX_VERx10 >= 75 6057ec681f3Smrg mi_store(b, dst_val, src_val); 6067ec681f3Smrg#else 6077ec681f3Smrg /* IVB does not have a general purpose register for command streamer 6087ec681f3Smrg * commands. Therefore, we use an alternate temporary register. 6097ec681f3Smrg */ 6107ec681f3Smrg struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */ 6117ec681f3Smrg mi_store(b, tmp_reg, src_val); 6127ec681f3Smrg mi_store(b, dst_val, tmp_reg); 6137ec681f3Smrg#endif 6147ec681f3Smrg } 6157ec681f3Smrg} 6167ec681f3Smrg 6177ec681f3Smrg/* 6187ec681f3Smrg * MI_MATH Section. Only available on Haswell+ 6197ec681f3Smrg */ 6207ec681f3Smrg 6217ec681f3Smrg#if GFX_VERx10 >= 75 6227ec681f3Smrg 6237ec681f3Smrg/** 6247ec681f3Smrg * Perform a predicated store (assuming the condition is already loaded 6257ec681f3Smrg * in the MI_PREDICATE_RESULT register) of the value in src to the memory 6267ec681f3Smrg * location specified by dst. Non-memory destinations are not supported. 6277ec681f3Smrg * 6287ec681f3Smrg * This function consumes one reference for each of src and dst. 6297ec681f3Smrg */ 6307ec681f3Smrgstatic inline void 6317ec681f3Smrgmi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src) 6327ec681f3Smrg{ 6337ec681f3Smrg assert(!dst.invert && !src.invert); 6347ec681f3Smrg 6357ec681f3Smrg mi_builder_flush_math(b); 6367ec681f3Smrg 6377ec681f3Smrg /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the 6387ec681f3Smrg * destination to be memory, and resolve the source to a temporary 6397ec681f3Smrg * register if it isn't in one already. 6407ec681f3Smrg */ 6417ec681f3Smrg assert(dst.type == MI_VALUE_TYPE_MEM64 || 6427ec681f3Smrg dst.type == MI_VALUE_TYPE_MEM32); 6437ec681f3Smrg 6447ec681f3Smrg if (src.type != MI_VALUE_TYPE_REG32 && 6457ec681f3Smrg src.type != MI_VALUE_TYPE_REG64) { 6467ec681f3Smrg struct mi_value tmp = mi_new_gpr(b); 6477ec681f3Smrg _mi_copy_no_unref(b, tmp, src); 6487ec681f3Smrg src = tmp; 6497ec681f3Smrg } 6507ec681f3Smrg 6517ec681f3Smrg if (dst.type == MI_VALUE_TYPE_MEM64) { 6527ec681f3Smrg mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 6537ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 6547ec681f3Smrg srm.RegisterAddress = reg.num; 6557ec681f3Smrg#if GFX_VER >= 11 6567ec681f3Smrg srm.AddCSMMIOStartOffset = reg.cs; 6577ec681f3Smrg#endif 6587ec681f3Smrg srm.MemoryAddress = dst.addr; 6597ec681f3Smrg srm.PredicateEnable = true; 6607ec681f3Smrg } 6617ec681f3Smrg mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 6627ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4); 6637ec681f3Smrg srm.RegisterAddress = reg.num; 6647ec681f3Smrg#if GFX_VER >= 11 6657ec681f3Smrg srm.AddCSMMIOStartOffset = reg.cs; 6667ec681f3Smrg#endif 6677ec681f3Smrg srm.MemoryAddress = __gen_address_offset(dst.addr, 4); 6687ec681f3Smrg srm.PredicateEnable = true; 6697ec681f3Smrg } 6707ec681f3Smrg } else { 6717ec681f3Smrg mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 6727ec681f3Smrg struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 6737ec681f3Smrg srm.RegisterAddress = reg.num; 6747ec681f3Smrg#if GFX_VER >= 11 6757ec681f3Smrg srm.AddCSMMIOStartOffset = reg.cs; 6767ec681f3Smrg#endif 6777ec681f3Smrg srm.MemoryAddress = dst.addr; 6787ec681f3Smrg srm.PredicateEnable = true; 6797ec681f3Smrg } 6807ec681f3Smrg } 6817ec681f3Smrg 6827ec681f3Smrg mi_value_unref(b, src); 6837ec681f3Smrg mi_value_unref(b, dst); 6847ec681f3Smrg} 6857ec681f3Smrg 6867ec681f3Smrgstatic inline void 6877ec681f3Smrg_mi_builder_push_math(struct mi_builder *b, 6887ec681f3Smrg const uint32_t *dwords, 6897ec681f3Smrg unsigned num_dwords) 6907ec681f3Smrg{ 6917ec681f3Smrg assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS); 6927ec681f3Smrg if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS) 6937ec681f3Smrg mi_builder_flush_math(b); 6947ec681f3Smrg 6957ec681f3Smrg memcpy(&b->math_dwords[b->num_math_dwords], 6967ec681f3Smrg dwords, num_dwords * sizeof(*dwords)); 6977ec681f3Smrg b->num_math_dwords += num_dwords; 6987ec681f3Smrg} 6997ec681f3Smrg 7007ec681f3Smrgstatic inline uint32_t 7017ec681f3Smrg_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2) 7027ec681f3Smrg{ 7037ec681f3Smrg struct GENX(MI_MATH_ALU_INSTRUCTION) instr = { 7047ec681f3Smrg .Operand2 = operand2, 7057ec681f3Smrg .Operand1 = operand1, 7067ec681f3Smrg .ALUOpcode = opcode, 7077ec681f3Smrg }; 7087ec681f3Smrg 7097ec681f3Smrg uint32_t dw; 7107ec681f3Smrg GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr); 7117ec681f3Smrg 7127ec681f3Smrg return dw; 7137ec681f3Smrg} 7147ec681f3Smrg 7157ec681f3Smrgstatic inline struct mi_value 7167ec681f3Smrgmi_value_to_gpr(struct mi_builder *b, struct mi_value val) 7177ec681f3Smrg{ 7187ec681f3Smrg if (mi_value_is_gpr(val)) 7197ec681f3Smrg return val; 7207ec681f3Smrg 7217ec681f3Smrg /* Save off the invert flag because it makes copy() grumpy */ 7227ec681f3Smrg bool invert = val.invert; 7237ec681f3Smrg val.invert = false; 7247ec681f3Smrg 7257ec681f3Smrg struct mi_value tmp = mi_new_gpr(b); 7267ec681f3Smrg _mi_copy_no_unref(b, tmp, val); 7277ec681f3Smrg tmp.invert = invert; 7287ec681f3Smrg 7297ec681f3Smrg return tmp; 7307ec681f3Smrg} 7317ec681f3Smrg 7327ec681f3Smrgstatic inline uint64_t 7337ec681f3Smrgmi_value_to_u64(struct mi_value val) 7347ec681f3Smrg{ 7357ec681f3Smrg assert(val.type == MI_VALUE_TYPE_IMM); 7367ec681f3Smrg return val.invert ? ~val.imm : val.imm; 7377ec681f3Smrg} 7387ec681f3Smrg 7397ec681f3Smrgstatic inline uint32_t 7407ec681f3Smrg_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val) 7417ec681f3Smrg{ 7427ec681f3Smrg if (val->type == MI_VALUE_TYPE_IMM && 7437ec681f3Smrg (val->imm == 0 || val->imm == UINT64_MAX)) { 7447ec681f3Smrg uint64_t imm = val->invert ? ~val->imm : val->imm; 7457ec681f3Smrg return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0); 7467ec681f3Smrg } else { 7477ec681f3Smrg *val = mi_value_to_gpr(b, *val); 7487ec681f3Smrg return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD, 7497ec681f3Smrg src, _mi_value_as_gpr(*val)); 7507ec681f3Smrg } 7517ec681f3Smrg} 7527ec681f3Smrg 7537ec681f3Smrgstatic inline struct mi_value 7547ec681f3Smrgmi_math_binop(struct mi_builder *b, uint32_t opcode, 7557ec681f3Smrg struct mi_value src0, struct mi_value src1, 7567ec681f3Smrg uint32_t store_op, uint32_t store_src) 7577ec681f3Smrg{ 7587ec681f3Smrg struct mi_value dst = mi_new_gpr(b); 7597ec681f3Smrg 7607ec681f3Smrg uint32_t dw[4]; 7617ec681f3Smrg dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0); 7627ec681f3Smrg dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1); 7637ec681f3Smrg dw[2] = _mi_pack_alu(opcode, 0, 0); 7647ec681f3Smrg dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src); 7657ec681f3Smrg _mi_builder_push_math(b, dw, 4); 7667ec681f3Smrg 7677ec681f3Smrg mi_value_unref(b, src0); 7687ec681f3Smrg mi_value_unref(b, src1); 7697ec681f3Smrg 7707ec681f3Smrg return dst; 7717ec681f3Smrg} 7727ec681f3Smrg 7737ec681f3Smrgstatic inline struct mi_value 7747ec681f3Smrgmi_inot(struct mi_builder *b, struct mi_value val) 7757ec681f3Smrg{ 7767ec681f3Smrg if (val.type == MI_VALUE_TYPE_IMM) 7777ec681f3Smrg return mi_imm(~mi_value_to_u64(val)); 7787ec681f3Smrg 7797ec681f3Smrg val.invert = !val.invert; 7807ec681f3Smrg return val; 7817ec681f3Smrg} 7827ec681f3Smrg 7837ec681f3Smrgstatic inline struct mi_value 7847ec681f3Smrgmi_resolve_invert(struct mi_builder *b, struct mi_value src) 7857ec681f3Smrg{ 7867ec681f3Smrg if (!src.invert) 7877ec681f3Smrg return src; 7887ec681f3Smrg 7897ec681f3Smrg assert(src.type != MI_VALUE_TYPE_IMM); 7907ec681f3Smrg return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 7917ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 7927ec681f3Smrg} 7937ec681f3Smrg 7947ec681f3Smrgstatic inline struct mi_value 7957ec681f3Smrgmi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 7967ec681f3Smrg{ 7977ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 7987ec681f3Smrg return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1)); 7997ec681f3Smrg 8007ec681f3Smrg return mi_math_binop(b, MI_ALU_ADD, src0, src1, 8017ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 8027ec681f3Smrg} 8037ec681f3Smrg 8047ec681f3Smrgstatic inline struct mi_value 8057ec681f3Smrgmi_iadd_imm(struct mi_builder *b, 8067ec681f3Smrg struct mi_value src, uint64_t N) 8077ec681f3Smrg{ 8087ec681f3Smrg if (N == 0) 8097ec681f3Smrg return src; 8107ec681f3Smrg 8117ec681f3Smrg return mi_iadd(b, src, mi_imm(N)); 8127ec681f3Smrg} 8137ec681f3Smrg 8147ec681f3Smrgstatic inline struct mi_value 8157ec681f3Smrgmi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 8167ec681f3Smrg{ 8177ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 8187ec681f3Smrg return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1)); 8197ec681f3Smrg 8207ec681f3Smrg return mi_math_binop(b, MI_ALU_SUB, src0, src1, 8217ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 8227ec681f3Smrg} 8237ec681f3Smrg 8247ec681f3Smrgstatic inline struct mi_value 8257ec681f3Smrgmi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 8267ec681f3Smrg{ 8277ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 8287ec681f3Smrg return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0); 8297ec681f3Smrg 8307ec681f3Smrg /* Compute "equal" by subtracting and storing the zero bit */ 8317ec681f3Smrg return mi_math_binop(b, MI_ALU_SUB, src0, src1, 8327ec681f3Smrg MI_ALU_STORE, MI_ALU_ZF); 8337ec681f3Smrg} 8347ec681f3Smrg 8357ec681f3Smrgstatic inline struct mi_value 8367ec681f3Smrgmi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 8377ec681f3Smrg{ 8387ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 8397ec681f3Smrg return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0); 8407ec681f3Smrg 8417ec681f3Smrg /* Compute "not equal" by subtracting and storing the inverse zero bit */ 8427ec681f3Smrg return mi_math_binop(b, MI_ALU_SUB, src0, src1, 8437ec681f3Smrg MI_ALU_STOREINV, MI_ALU_ZF); 8447ec681f3Smrg} 8457ec681f3Smrg 8467ec681f3Smrgstatic inline struct mi_value 8477ec681f3Smrgmi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 8487ec681f3Smrg{ 8497ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 8507ec681f3Smrg return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0); 8517ec681f3Smrg 8527ec681f3Smrg /* Compute "less than" by subtracting and storing the carry bit */ 8537ec681f3Smrg return mi_math_binop(b, MI_ALU_SUB, src0, src1, 8547ec681f3Smrg MI_ALU_STORE, MI_ALU_CF); 8557ec681f3Smrg} 8567ec681f3Smrg 8577ec681f3Smrgstatic inline struct mi_value 8587ec681f3Smrgmi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 8597ec681f3Smrg{ 8607ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 8617ec681f3Smrg return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0); 8627ec681f3Smrg 8637ec681f3Smrg /* Compute "less than" by subtracting and storing the carry bit */ 8647ec681f3Smrg return mi_math_binop(b, MI_ALU_SUB, src0, src1, 8657ec681f3Smrg MI_ALU_STOREINV, MI_ALU_CF); 8667ec681f3Smrg} 8677ec681f3Smrg 8687ec681f3Smrgstatic inline struct mi_value 8697ec681f3Smrgmi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 8707ec681f3Smrg{ 8717ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 8727ec681f3Smrg return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1)); 8737ec681f3Smrg 8747ec681f3Smrg return mi_math_binop(b, MI_ALU_AND, src0, src1, 8757ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 8767ec681f3Smrg} 8777ec681f3Smrg 8787ec681f3Smrgstatic inline struct mi_value 8797ec681f3Smrgmi_nz(struct mi_builder *b, struct mi_value src) 8807ec681f3Smrg{ 8817ec681f3Smrg if (src.type == MI_VALUE_TYPE_IMM) 8827ec681f3Smrg return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0); 8837ec681f3Smrg 8847ec681f3Smrg return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 8857ec681f3Smrg MI_ALU_STOREINV, MI_ALU_ZF); 8867ec681f3Smrg} 8877ec681f3Smrg 8887ec681f3Smrgstatic inline struct mi_value 8897ec681f3Smrgmi_z(struct mi_builder *b, struct mi_value src) 8907ec681f3Smrg{ 8917ec681f3Smrg if (src.type == MI_VALUE_TYPE_IMM) 8927ec681f3Smrg return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0); 8937ec681f3Smrg 8947ec681f3Smrg return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 8957ec681f3Smrg MI_ALU_STORE, MI_ALU_ZF); 8967ec681f3Smrg} 8977ec681f3Smrg 8987ec681f3Smrgstatic inline struct mi_value 8997ec681f3Smrgmi_ior(struct mi_builder *b, 9007ec681f3Smrg struct mi_value src0, struct mi_value src1) 9017ec681f3Smrg{ 9027ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 9037ec681f3Smrg return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1)); 9047ec681f3Smrg 9057ec681f3Smrg return mi_math_binop(b, MI_ALU_OR, src0, src1, 9067ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 9077ec681f3Smrg} 9087ec681f3Smrg 9097ec681f3Smrg#if GFX_VERx10 >= 125 9107ec681f3Smrgstatic inline struct mi_value 9117ec681f3Smrgmi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 9127ec681f3Smrg{ 9137ec681f3Smrg if (src1.type == MI_VALUE_TYPE_IMM) { 9147ec681f3Smrg assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 9157ec681f3Smrg assert(mi_value_to_u64(src1) <= 32); 9167ec681f3Smrg } 9177ec681f3Smrg 9187ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 9197ec681f3Smrg return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1)); 9207ec681f3Smrg 9217ec681f3Smrg return mi_math_binop(b, MI_ALU_SHL, src0, src1, 9227ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 9237ec681f3Smrg} 9247ec681f3Smrg 9257ec681f3Smrgstatic inline struct mi_value 9267ec681f3Smrgmi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 9277ec681f3Smrg{ 9287ec681f3Smrg if (src1.type == MI_VALUE_TYPE_IMM) { 9297ec681f3Smrg assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 9307ec681f3Smrg assert(mi_value_to_u64(src1) <= 32); 9317ec681f3Smrg } 9327ec681f3Smrg 9337ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 9347ec681f3Smrg return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1)); 9357ec681f3Smrg 9367ec681f3Smrg return mi_math_binop(b, MI_ALU_SHR, src0, src1, 9377ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 9387ec681f3Smrg} 9397ec681f3Smrg 9407ec681f3Smrgstatic inline struct mi_value 9417ec681f3Smrgmi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 9427ec681f3Smrg{ 9437ec681f3Smrg if (shift == 0) 9447ec681f3Smrg return src; 9457ec681f3Smrg 9467ec681f3Smrg if (shift >= 64) 9477ec681f3Smrg return mi_imm(0); 9487ec681f3Smrg 9497ec681f3Smrg if (src.type == MI_VALUE_TYPE_IMM) 9507ec681f3Smrg return mi_imm(mi_value_to_u64(src) >> shift); 9517ec681f3Smrg 9527ec681f3Smrg struct mi_value res = mi_value_to_gpr(b, src); 9537ec681f3Smrg 9547ec681f3Smrg /* Annoyingly, we only have power-of-two shifts */ 9557ec681f3Smrg while (shift) { 9567ec681f3Smrg int bit = u_bit_scan(&shift); 9577ec681f3Smrg assert(bit <= 5); 9587ec681f3Smrg res = mi_ushr(b, res, mi_imm(1 << bit)); 9597ec681f3Smrg } 9607ec681f3Smrg 9617ec681f3Smrg return res; 9627ec681f3Smrg} 9637ec681f3Smrg 9647ec681f3Smrgstatic inline struct mi_value 9657ec681f3Smrgmi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 9667ec681f3Smrg{ 9677ec681f3Smrg if (src1.type == MI_VALUE_TYPE_IMM) { 9687ec681f3Smrg assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 9697ec681f3Smrg assert(mi_value_to_u64(src1) <= 32); 9707ec681f3Smrg } 9717ec681f3Smrg 9727ec681f3Smrg if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 9737ec681f3Smrg return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1)); 9747ec681f3Smrg 9757ec681f3Smrg return mi_math_binop(b, MI_ALU_SAR, src0, src1, 9767ec681f3Smrg MI_ALU_STORE, MI_ALU_ACCU); 9777ec681f3Smrg} 9787ec681f3Smrg 9797ec681f3Smrgstatic inline struct mi_value 9807ec681f3Smrgmi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 9817ec681f3Smrg{ 9827ec681f3Smrg if (shift == 0) 9837ec681f3Smrg return src; 9847ec681f3Smrg 9857ec681f3Smrg if (shift >= 64) 9867ec681f3Smrg return mi_imm(0); 9877ec681f3Smrg 9887ec681f3Smrg if (src.type == MI_VALUE_TYPE_IMM) 9897ec681f3Smrg return mi_imm((int64_t)mi_value_to_u64(src) >> shift); 9907ec681f3Smrg 9917ec681f3Smrg struct mi_value res = mi_value_to_gpr(b, src); 9927ec681f3Smrg 9937ec681f3Smrg /* Annoyingly, we only have power-of-two shifts */ 9947ec681f3Smrg while (shift) { 9957ec681f3Smrg int bit = u_bit_scan(&shift); 9967ec681f3Smrg assert(bit <= 5); 9977ec681f3Smrg res = mi_ishr(b, res, mi_imm(1 << bit)); 9987ec681f3Smrg } 9997ec681f3Smrg 10007ec681f3Smrg return res; 10017ec681f3Smrg} 10027ec681f3Smrg#endif /* if GFX_VERx10 >= 125 */ 10037ec681f3Smrg 10047ec681f3Smrgstatic inline struct mi_value 10057ec681f3Smrgmi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N) 10067ec681f3Smrg{ 10077ec681f3Smrg if (src.type == MI_VALUE_TYPE_IMM) 10087ec681f3Smrg return mi_imm(mi_value_to_u64(src) * N); 10097ec681f3Smrg 10107ec681f3Smrg if (N == 0) { 10117ec681f3Smrg mi_value_unref(b, src); 10127ec681f3Smrg return mi_imm(0); 10137ec681f3Smrg } 10147ec681f3Smrg 10157ec681f3Smrg if (N == 1) 10167ec681f3Smrg return src; 10177ec681f3Smrg 10187ec681f3Smrg src = mi_value_to_gpr(b, src); 10197ec681f3Smrg 10207ec681f3Smrg struct mi_value res = mi_value_ref(b, src); 10217ec681f3Smrg 10227ec681f3Smrg unsigned top_bit = 31 - __builtin_clz(N); 10237ec681f3Smrg for (int i = top_bit - 1; i >= 0; i--) { 10247ec681f3Smrg res = mi_iadd(b, res, mi_value_ref(b, res)); 10257ec681f3Smrg if (N & (1 << i)) 10267ec681f3Smrg res = mi_iadd(b, res, mi_value_ref(b, src)); 10277ec681f3Smrg } 10287ec681f3Smrg 10297ec681f3Smrg mi_value_unref(b, src); 10307ec681f3Smrg 10317ec681f3Smrg return res; 10327ec681f3Smrg} 10337ec681f3Smrg 10347ec681f3Smrgstatic inline struct mi_value 10357ec681f3Smrgmi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 10367ec681f3Smrg{ 10377ec681f3Smrg if (shift == 0) 10387ec681f3Smrg return src; 10397ec681f3Smrg 10407ec681f3Smrg if (shift >= 64) 10417ec681f3Smrg return mi_imm(0); 10427ec681f3Smrg 10437ec681f3Smrg if (src.type == MI_VALUE_TYPE_IMM) 10447ec681f3Smrg return mi_imm(mi_value_to_u64(src) << shift); 10457ec681f3Smrg 10467ec681f3Smrg struct mi_value res = mi_value_to_gpr(b, src); 10477ec681f3Smrg 10487ec681f3Smrg#if GFX_VERx10 >= 125 10497ec681f3Smrg /* Annoyingly, we only have power-of-two shifts */ 10507ec681f3Smrg while (shift) { 10517ec681f3Smrg int bit = u_bit_scan(&shift); 10527ec681f3Smrg assert(bit <= 5); 10537ec681f3Smrg res = mi_ishl(b, res, mi_imm(1 << bit)); 10547ec681f3Smrg } 10557ec681f3Smrg#else 10567ec681f3Smrg for (unsigned i = 0; i < shift; i++) 10577ec681f3Smrg res = mi_iadd(b, res, mi_value_ref(b, res)); 10587ec681f3Smrg#endif 10597ec681f3Smrg 10607ec681f3Smrg return res; 10617ec681f3Smrg} 10627ec681f3Smrg 10637ec681f3Smrgstatic inline struct mi_value 10647ec681f3Smrgmi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 10657ec681f3Smrg{ 10667ec681f3Smrg if (shift == 0) 10677ec681f3Smrg return src; 10687ec681f3Smrg 10697ec681f3Smrg if (shift >= 64) 10707ec681f3Smrg return mi_imm(0); 10717ec681f3Smrg 10727ec681f3Smrg /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits 10737ec681f3Smrg * of the result. 10747ec681f3Smrg */ 10757ec681f3Smrg if (src.type == MI_VALUE_TYPE_IMM) 10767ec681f3Smrg return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX); 10777ec681f3Smrg 10787ec681f3Smrg if (shift > 32) { 10797ec681f3Smrg struct mi_value tmp = mi_new_gpr(b); 10807ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(tmp, false), 10817ec681f3Smrg mi_value_half(src, true)); 10827ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0)); 10837ec681f3Smrg mi_value_unref(b, src); 10847ec681f3Smrg src = tmp; 10857ec681f3Smrg shift -= 32; 10867ec681f3Smrg } 10877ec681f3Smrg assert(shift <= 32); 10887ec681f3Smrg struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift); 10897ec681f3Smrg struct mi_value dst = mi_new_gpr(b); 10907ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, false), 10917ec681f3Smrg mi_value_half(tmp, true)); 10927ec681f3Smrg _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0)); 10937ec681f3Smrg mi_value_unref(b, tmp); 10947ec681f3Smrg return dst; 10957ec681f3Smrg} 10967ec681f3Smrg 10977ec681f3Smrgstatic inline struct mi_value 10987ec681f3Smrgmi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D) 10997ec681f3Smrg{ 11007ec681f3Smrg if (N.type == MI_VALUE_TYPE_IMM) { 11017ec681f3Smrg assert(mi_value_to_u64(N) <= UINT32_MAX); 11027ec681f3Smrg return mi_imm(mi_value_to_u64(N) / D); 11037ec681f3Smrg } 11047ec681f3Smrg 11057ec681f3Smrg /* We implicitly assume that N is only a 32-bit value */ 11067ec681f3Smrg if (D == 0) { 11077ec681f3Smrg /* This is invalid but we should do something */ 11087ec681f3Smrg return mi_imm(0); 11097ec681f3Smrg } else if (util_is_power_of_two_or_zero(D)) { 11107ec681f3Smrg return mi_ushr32_imm(b, N, util_logbase2(D)); 11117ec681f3Smrg } else { 11127ec681f3Smrg struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32); 11137ec681f3Smrg assert(m.multiplier <= UINT32_MAX); 11147ec681f3Smrg 11157ec681f3Smrg if (m.pre_shift) 11167ec681f3Smrg N = mi_ushr32_imm(b, N, m.pre_shift); 11177ec681f3Smrg 11187ec681f3Smrg /* Do the 32x32 multiply into gpr0 */ 11197ec681f3Smrg N = mi_imul_imm(b, N, m.multiplier); 11207ec681f3Smrg 11217ec681f3Smrg if (m.increment) 11227ec681f3Smrg N = mi_iadd(b, N, mi_imm(m.multiplier)); 11237ec681f3Smrg 11247ec681f3Smrg N = mi_ushr32_imm(b, N, 32); 11257ec681f3Smrg 11267ec681f3Smrg if (m.post_shift) 11277ec681f3Smrg N = mi_ushr32_imm(b, N, m.post_shift); 11287ec681f3Smrg 11297ec681f3Smrg return N; 11307ec681f3Smrg } 11317ec681f3Smrg} 11327ec681f3Smrg 11337ec681f3Smrg#endif /* MI_MATH section */ 11347ec681f3Smrg 11357ec681f3Smrg/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */ 11367ec681f3Smrg#if MI_BUILDER_CAN_WRITE_BATCH 11377ec681f3Smrg 11387ec681f3Smrgstruct mi_address_token { 11397ec681f3Smrg /* Pointers to address memory fields in the batch. */ 11407ec681f3Smrg uint64_t *ptrs[2]; 11417ec681f3Smrg}; 11427ec681f3Smrg 11437ec681f3Smrgstatic inline struct mi_address_token 11447ec681f3Smrgmi_store_address(struct mi_builder *b, struct mi_value addr_reg) 11457ec681f3Smrg{ 11467ec681f3Smrg mi_builder_flush_math(b); 11477ec681f3Smrg 11487ec681f3Smrg assert(addr_reg.type == MI_VALUE_TYPE_REG64); 11497ec681f3Smrg 11507ec681f3Smrg struct mi_address_token token = {}; 11517ec681f3Smrg 11527ec681f3Smrg for (unsigned i = 0; i < 2; i++) { 11537ec681f3Smrg mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 11547ec681f3Smrg srm.RegisterAddress = addr_reg.reg + (i * 4); 11557ec681f3Smrg 11567ec681f3Smrg const unsigned addr_dw = 11577ec681f3Smrg GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8; 11587ec681f3Smrg token.ptrs[i] = (void *)_dst + addr_dw; 11597ec681f3Smrg } 11607ec681f3Smrg } 11617ec681f3Smrg 11627ec681f3Smrg mi_value_unref(b, addr_reg); 11637ec681f3Smrg return token; 11647ec681f3Smrg} 11657ec681f3Smrg 11667ec681f3Smrgstatic inline void 11677ec681f3Smrgmi_self_mod_barrier(struct mi_builder *b) 11687ec681f3Smrg{ 11697ec681f3Smrg /* First make sure all the memory writes from previous modifying commands 11707ec681f3Smrg * have landed. We want to do this before going through the CS cache, 11717ec681f3Smrg * otherwise we could be fetching memory that hasn't been written to yet. 11727ec681f3Smrg */ 11737ec681f3Smrg mi_builder_emit(b, GENX(PIPE_CONTROL), pc) { 11747ec681f3Smrg pc.CommandStreamerStallEnable = true; 11757ec681f3Smrg } 11767ec681f3Smrg /* Documentation says Gfx11+ should be able to invalidate the command cache 11777ec681f3Smrg * but experiment show it doesn't work properly, so for now just get over 11787ec681f3Smrg * the CS prefetch. 11797ec681f3Smrg */ 11807ec681f3Smrg for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++) 11817ec681f3Smrg mi_builder_emit(b, GENX(MI_NOOP), noop); 11827ec681f3Smrg} 11837ec681f3Smrg 11847ec681f3Smrgstatic inline void 11857ec681f3Smrg_mi_resolve_address_token(struct mi_builder *b, 11867ec681f3Smrg struct mi_address_token token, 11877ec681f3Smrg void *batch_location) 11887ec681f3Smrg{ 11897ec681f3Smrg __gen_address_type addr = __gen_get_batch_address(b->user_data, 11907ec681f3Smrg batch_location); 11917ec681f3Smrg uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location, 11927ec681f3Smrg addr, 0); 11937ec681f3Smrg *(token.ptrs[0]) = addr_addr_u64; 11947ec681f3Smrg *(token.ptrs[1]) = addr_addr_u64 + 4; 11957ec681f3Smrg} 11967ec681f3Smrg 11977ec681f3Smrg#endif /* MI_BUILDER_CAN_WRITE_BATCH */ 11987ec681f3Smrg 11997ec681f3Smrg#if GFX_VERx10 >= 125 12007ec681f3Smrg 12017ec681f3Smrg/* 12027ec681f3Smrg * Indirect load/store. Only available on XE_HP+ 12037ec681f3Smrg */ 12047ec681f3Smrg 12057ec681f3SmrgMUST_CHECK static inline struct mi_value 12067ec681f3Smrgmi_load_mem64_offset(struct mi_builder *b, 12077ec681f3Smrg __gen_address_type addr, struct mi_value offset) 12087ec681f3Smrg{ 12097ec681f3Smrg uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0); 12107ec681f3Smrg struct mi_value addr_val = mi_imm(addr_u64); 12117ec681f3Smrg 12127ec681f3Smrg struct mi_value dst = mi_new_gpr(b); 12137ec681f3Smrg 12147ec681f3Smrg uint32_t dw[5]; 12157ec681f3Smrg dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val); 12167ec681f3Smrg dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset); 12177ec681f3Smrg dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0); 12187ec681f3Smrg dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU); 12197ec681f3Smrg dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0); 12207ec681f3Smrg _mi_builder_push_math(b, dw, 5); 12217ec681f3Smrg 12227ec681f3Smrg mi_value_unref(b, addr_val); 12237ec681f3Smrg mi_value_unref(b, offset); 12247ec681f3Smrg 12257ec681f3Smrg return dst; 12267ec681f3Smrg} 12277ec681f3Smrg 12287ec681f3Smrgstatic inline void 12297ec681f3Smrgmi_store_mem64_offset(struct mi_builder *b, 12307ec681f3Smrg __gen_address_type addr, struct mi_value offset, 12317ec681f3Smrg struct mi_value data) 12327ec681f3Smrg{ 12337ec681f3Smrg uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0); 12347ec681f3Smrg struct mi_value addr_val = mi_imm(addr_u64); 12357ec681f3Smrg 12367ec681f3Smrg data = mi_value_to_gpr(b, mi_resolve_invert(b, data)); 12377ec681f3Smrg 12387ec681f3Smrg uint32_t dw[5]; 12397ec681f3Smrg dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val); 12407ec681f3Smrg dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset); 12417ec681f3Smrg dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0); 12427ec681f3Smrg dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data)); 12437ec681f3Smrg dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0); 12447ec681f3Smrg _mi_builder_push_math(b, dw, 5); 12457ec681f3Smrg 12467ec681f3Smrg mi_value_unref(b, addr_val); 12477ec681f3Smrg mi_value_unref(b, offset); 12487ec681f3Smrg mi_value_unref(b, data); 12497ec681f3Smrg 12507ec681f3Smrg /* This is the only math case which has side-effects outside of regular 12517ec681f3Smrg * registers to flush math afterwards so we don't confuse anyone. 12527ec681f3Smrg */ 12537ec681f3Smrg mi_builder_flush_math(b); 12547ec681f3Smrg} 12557ec681f3Smrg 12567ec681f3Smrg/* 12577ec681f3Smrg * Control-flow Section. Only available on XE_HP+ 12587ec681f3Smrg */ 12597ec681f3Smrg 12607ec681f3Smrgstruct _mi_goto { 12617ec681f3Smrg bool predicated; 12627ec681f3Smrg void *mi_bbs; 12637ec681f3Smrg}; 12647ec681f3Smrg 12657ec681f3Smrgstruct mi_goto_target { 12667ec681f3Smrg bool placed; 12677ec681f3Smrg unsigned num_gotos; 12687ec681f3Smrg struct _mi_goto gotos[8]; 12697ec681f3Smrg __gen_address_type addr; 12707ec681f3Smrg}; 12717ec681f3Smrg 12727ec681f3Smrg#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {}) 12737ec681f3Smrg 12747ec681f3Smrg#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418 12757ec681f3Smrg 12767ec681f3Smrgstatic inline void 12777ec681f3Smrgmi_goto_if(struct mi_builder *b, struct mi_value cond, 12787ec681f3Smrg struct mi_goto_target *t) 12797ec681f3Smrg{ 12807ec681f3Smrg /* First, set up the predicate, if any */ 12817ec681f3Smrg bool predicated; 12827ec681f3Smrg if (cond.type == MI_VALUE_TYPE_IMM) { 12837ec681f3Smrg /* If it's an immediate, the goto either doesn't happen or happens 12847ec681f3Smrg * unconditionally. 12857ec681f3Smrg */ 12867ec681f3Smrg if (mi_value_to_u64(cond) == 0) 12877ec681f3Smrg return; 12887ec681f3Smrg 12897ec681f3Smrg assert(mi_value_to_u64(cond) == ~0ull); 12907ec681f3Smrg predicated = false; 12917ec681f3Smrg } else if (mi_value_is_reg(cond) && 12927ec681f3Smrg cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) { 12937ec681f3Smrg /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client 12947ec681f3Smrg * provided us with 12957ec681f3Smrg */ 12967ec681f3Smrg assert(cond.type == MI_VALUE_TYPE_REG32); 12977ec681f3Smrg predicated = true; 12987ec681f3Smrg } else { 12997ec681f3Smrg mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond); 13007ec681f3Smrg predicated = true; 13017ec681f3Smrg } 13027ec681f3Smrg 13037ec681f3Smrg if (predicated) { 13047ec681f3Smrg mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 13057ec681f3Smrg sp.PredicateEnable = NOOPOnResultClear; 13067ec681f3Smrg } 13077ec681f3Smrg } 13087ec681f3Smrg if (t->placed) { 13097ec681f3Smrg mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) { 13107ec681f3Smrg bbs.PredicationEnable = predicated; 13117ec681f3Smrg bbs.AddressSpaceIndicator = ASI_PPGTT; 13127ec681f3Smrg bbs.BatchBufferStartAddress = t->addr; 13137ec681f3Smrg } 13147ec681f3Smrg } else { 13157ec681f3Smrg assert(t->num_gotos < ARRAY_SIZE(t->gotos)); 13167ec681f3Smrg struct _mi_goto g = { 13177ec681f3Smrg .predicated = predicated, 13187ec681f3Smrg .mi_bbs = __gen_get_batch_dwords(b->user_data, 13197ec681f3Smrg GENX(MI_BATCH_BUFFER_START_length)), 13207ec681f3Smrg }; 13217ec681f3Smrg memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length)); 13227ec681f3Smrg t->gotos[t->num_gotos++] = g; 13237ec681f3Smrg } 13247ec681f3Smrg if (predicated) { 13257ec681f3Smrg mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 13267ec681f3Smrg sp.PredicateEnable = NOOPNever; 13277ec681f3Smrg } 13287ec681f3Smrg } 13297ec681f3Smrg} 13307ec681f3Smrg 13317ec681f3Smrgstatic inline void 13327ec681f3Smrgmi_goto(struct mi_builder *b, struct mi_goto_target *t) 13337ec681f3Smrg{ 13347ec681f3Smrg mi_goto_if(b, mi_imm(-1), t); 13357ec681f3Smrg} 13367ec681f3Smrg 13377ec681f3Smrgstatic inline void 13387ec681f3Smrgmi_goto_target(struct mi_builder *b, struct mi_goto_target *t) 13397ec681f3Smrg{ 13407ec681f3Smrg mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 13417ec681f3Smrg sp.PredicateEnable = NOOPNever; 13427ec681f3Smrg t->addr = __gen_get_batch_address(b->user_data, _dst); 13437ec681f3Smrg } 13447ec681f3Smrg t->placed = true; 13457ec681f3Smrg 13467ec681f3Smrg struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) }; 13477ec681f3Smrg bbs.AddressSpaceIndicator = ASI_PPGTT; 13487ec681f3Smrg bbs.BatchBufferStartAddress = t->addr; 13497ec681f3Smrg 13507ec681f3Smrg for (unsigned i = 0; i < t->num_gotos; i++) { 13517ec681f3Smrg bbs.PredicationEnable = t->gotos[i].predicated; 13527ec681f3Smrg GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs); 13537ec681f3Smrg } 13547ec681f3Smrg} 13557ec681f3Smrg 13567ec681f3Smrgstatic inline struct mi_goto_target 13577ec681f3Smrgmi_goto_target_init_and_place(struct mi_builder *b) 13587ec681f3Smrg{ 13597ec681f3Smrg struct mi_goto_target t = MI_GOTO_TARGET_INIT; 13607ec681f3Smrg mi_goto_target(b, &t); 13617ec681f3Smrg return t; 13627ec681f3Smrg} 13637ec681f3Smrg 13647ec681f3Smrg#define mi_loop(b) \ 13657ec681f3Smrg for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \ 13667ec681f3Smrg __continue = mi_goto_target_init_and_place(b); !__break.placed; \ 13677ec681f3Smrg mi_goto(b, &__continue), mi_goto_target(b, &__break)) 13687ec681f3Smrg 13697ec681f3Smrg#define mi_break(b) mi_goto(b, &__break) 13707ec681f3Smrg#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break) 13717ec681f3Smrg#define mi_continue(b) mi_goto(b, &__continue) 13727ec681f3Smrg#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue) 13737ec681f3Smrg 13747ec681f3Smrg#endif /* GFX_VERx10 >= 125 */ 13757ec681f3Smrg 13767ec681f3Smrg#endif /* MI_BUILDER_H */ 1377