17e102996Smaya/* 27e102996Smaya * Copyright (C) 2017-2018 Rob Clark <robclark@freedesktop.org> 37e102996Smaya * 47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a 57e102996Smaya * copy of this software and associated documentation files (the "Software"), 67e102996Smaya * to deal in the Software without restriction, including without limitation 77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the 97e102996Smaya * Software is furnished to do so, subject to the following conditions: 107e102996Smaya * 117e102996Smaya * The above copyright notice and this permission notice (including the next 127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the 137e102996Smaya * Software. 147e102996Smaya * 157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217e102996Smaya * SOFTWARE. 227e102996Smaya * 237e102996Smaya * Authors: 247e102996Smaya * Rob Clark <robclark@freedesktop.org> 257e102996Smaya */ 267e102996Smaya 277e102996Smaya#define GPU 600 287e102996Smaya 297e102996Smaya#include "ir3_context.h" 307e102996Smaya#include "ir3_image.h" 317e102996Smaya 327e102996Smaya/* 337e102996Smaya * Handlers for instructions changed/added in a6xx: 347e102996Smaya * 357e102996Smaya * Starting with a6xx, isam and stbi is used for SSBOs as well; stbi and the 367e102996Smaya * atomic instructions (used for both SSBO and image) use a new instruction 377e102996Smaya * encoding compared to a4xx/a5xx. 387e102996Smaya */ 397e102996Smaya 407e102996Smaya/* src[] = { buffer_index, offset }. No const_index */ 417e102996Smayastatic void 427e102996Smayaemit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, 437ec681f3Smrg struct ir3_instruction **dst) 447e102996Smaya{ 457ec681f3Smrg struct ir3_block *b = ctx->block; 467ec681f3Smrg struct ir3_instruction *offset; 477ec681f3Smrg struct ir3_instruction *ldib; 487ec681f3Smrg 497ec681f3Smrg offset = ir3_get_src(ctx, &intr->src[2])[0]; 507ec681f3Smrg 517ec681f3Smrg ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0); 527ec681f3Smrg ldib->dsts[0]->wrmask = MASK(intr->num_components); 537ec681f3Smrg ldib->cat6.iim_val = intr->num_components; 547ec681f3Smrg ldib->cat6.d = 1; 557ec681f3Smrg ldib->cat6.type = intr->dest.ssa.bit_size == 16 ? TYPE_U16 : TYPE_U32; 567ec681f3Smrg ldib->barrier_class = IR3_BARRIER_BUFFER_R; 577ec681f3Smrg ldib->barrier_conflict = IR3_BARRIER_BUFFER_W; 587ec681f3Smrg ir3_handle_bindless_cat6(ldib, intr->src[0]); 597ec681f3Smrg ir3_handle_nonuniform(ldib, intr); 607ec681f3Smrg 617ec681f3Smrg ir3_split_dest(b, dst, ldib, 0, intr->num_components); 627e102996Smaya} 637e102996Smaya 647e102996Smaya/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ 657e102996Smayastatic void 667e102996Smayaemit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) 677e102996Smaya{ 687ec681f3Smrg struct ir3_block *b = ctx->block; 697ec681f3Smrg struct ir3_instruction *stib, *val, *offset; 707ec681f3Smrg unsigned wrmask = nir_intrinsic_write_mask(intr); 717ec681f3Smrg unsigned ncomp = ffs(~wrmask) - 1; 727ec681f3Smrg 737ec681f3Smrg assert(wrmask == BITFIELD_MASK(intr->num_components)); 747ec681f3Smrg 757ec681f3Smrg /* src0 is offset, src1 is value: 767ec681f3Smrg */ 777ec681f3Smrg val = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp); 787ec681f3Smrg offset = ir3_get_src(ctx, &intr->src[3])[0]; 797ec681f3Smrg 807ec681f3Smrg stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0, val, 0); 817ec681f3Smrg stib->cat6.iim_val = ncomp; 827ec681f3Smrg stib->cat6.d = 1; 837ec681f3Smrg stib->cat6.type = intr->src[0].ssa->bit_size == 16 ? TYPE_U16 : TYPE_U32; 847ec681f3Smrg stib->barrier_class = IR3_BARRIER_BUFFER_W; 857ec681f3Smrg stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; 867ec681f3Smrg ir3_handle_bindless_cat6(stib, intr->src[1]); 877ec681f3Smrg ir3_handle_nonuniform(stib, intr); 887ec681f3Smrg 897ec681f3Smrg array_insert(b, b->keeps, stib); 907e102996Smaya} 917e102996Smaya 927e102996Smaya/* 937e102996Smaya * SSBO atomic intrinsics 947e102996Smaya * 957e102996Smaya * All of the SSBO atomic memory operations read a value from memory, 967e102996Smaya * compute a new value using one of the operations below, write the new 977e102996Smaya * value to memory, and return the original value read. 987e102996Smaya * 997e102996Smaya * All operations take 3 sources except CompSwap that takes 4. These 1007e102996Smaya * sources represent: 1017e102996Smaya * 1027e102996Smaya * 0: The SSBO buffer index. 1037e102996Smaya * 1: The offset into the SSBO buffer of the variable that the atomic 1047e102996Smaya * operation will operate on. 1057e102996Smaya * 2: The data parameter to the atomic function (i.e. the value to add 1067e102996Smaya * in ssbo_atomic_add, etc). 1077e102996Smaya * 3: For CompSwap only: the second data parameter. 1087e102996Smaya */ 1097e102996Smayastatic struct ir3_instruction * 1107e102996Smayaemit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) 1117e102996Smaya{ 1127ec681f3Smrg struct ir3_block *b = ctx->block; 1137ec681f3Smrg struct ir3_instruction *atomic, *ibo, *src0, *src1, *data, *dummy; 1147ec681f3Smrg type_t type = TYPE_U32; 1157ec681f3Smrg 1167ec681f3Smrg ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]); 1177ec681f3Smrg 1187ec681f3Smrg data = ir3_get_src(ctx, &intr->src[2])[0]; 1197ec681f3Smrg 1207ec681f3Smrg /* So this gets a bit creative: 1217ec681f3Smrg * 1227ec681f3Smrg * src0 - vecN offset/coords 1237ec681f3Smrg * src1.x - is actually destination register 1247ec681f3Smrg * src1.y - is 'data' except for cmpxchg where src2.y is 'compare' 1257ec681f3Smrg * src1.z - is 'data' for cmpxchg 1267ec681f3Smrg * 1277ec681f3Smrg * The combining src and dest kinda doesn't work out so well with how 1287ec681f3Smrg * scheduling and RA work. So we create a dummy src2 which is tied to the 1297ec681f3Smrg * destination in RA (i.e. must be allocated to the same vec2/vec3 1307ec681f3Smrg * register) and then immediately extract the first component. 1317ec681f3Smrg * 1327ec681f3Smrg * Note that nir already multiplies the offset by four 1337ec681f3Smrg */ 1347ec681f3Smrg dummy = create_immed(b, 0); 1357ec681f3Smrg 1367ec681f3Smrg if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap_ir3) { 1377ec681f3Smrg src0 = ir3_get_src(ctx, &intr->src[4])[0]; 1387ec681f3Smrg struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[3])[0]; 1397ec681f3Smrg src1 = ir3_collect(b, dummy, compare, data); 1407ec681f3Smrg } else { 1417ec681f3Smrg src0 = ir3_get_src(ctx, &intr->src[3])[0]; 1427ec681f3Smrg src1 = ir3_collect(b, dummy, data); 1437ec681f3Smrg } 1447ec681f3Smrg 1457ec681f3Smrg switch (intr->intrinsic) { 1467ec681f3Smrg case nir_intrinsic_ssbo_atomic_add_ir3: 1477ec681f3Smrg atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0); 1487ec681f3Smrg break; 1497ec681f3Smrg case nir_intrinsic_ssbo_atomic_imin_ir3: 1507ec681f3Smrg atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0); 1517ec681f3Smrg type = TYPE_S32; 1527ec681f3Smrg break; 1537ec681f3Smrg case nir_intrinsic_ssbo_atomic_umin_ir3: 1547ec681f3Smrg atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0); 1557ec681f3Smrg break; 1567ec681f3Smrg case nir_intrinsic_ssbo_atomic_imax_ir3: 1577ec681f3Smrg atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0); 1587ec681f3Smrg type = TYPE_S32; 1597ec681f3Smrg break; 1607ec681f3Smrg case nir_intrinsic_ssbo_atomic_umax_ir3: 1617ec681f3Smrg atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0); 1627ec681f3Smrg break; 1637ec681f3Smrg case nir_intrinsic_ssbo_atomic_and_ir3: 1647ec681f3Smrg atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0); 1657ec681f3Smrg break; 1667ec681f3Smrg case nir_intrinsic_ssbo_atomic_or_ir3: 1677ec681f3Smrg atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0); 1687ec681f3Smrg break; 1697ec681f3Smrg case nir_intrinsic_ssbo_atomic_xor_ir3: 1707ec681f3Smrg atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0); 1717ec681f3Smrg break; 1727ec681f3Smrg case nir_intrinsic_ssbo_atomic_exchange_ir3: 1737ec681f3Smrg atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0); 1747ec681f3Smrg break; 1757ec681f3Smrg case nir_intrinsic_ssbo_atomic_comp_swap_ir3: 1767ec681f3Smrg atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0); 1777ec681f3Smrg break; 1787ec681f3Smrg default: 1797ec681f3Smrg unreachable("boo"); 1807ec681f3Smrg } 1817ec681f3Smrg 1827ec681f3Smrg atomic->cat6.iim_val = 1; 1837ec681f3Smrg atomic->cat6.d = 1; 1847ec681f3Smrg atomic->cat6.type = type; 1857ec681f3Smrg atomic->barrier_class = IR3_BARRIER_BUFFER_W; 1867ec681f3Smrg atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; 1877ec681f3Smrg ir3_handle_bindless_cat6(atomic, intr->src[0]); 1887ec681f3Smrg 1897ec681f3Smrg /* even if nothing consume the result, we can't DCE the instruction: */ 1907ec681f3Smrg array_insert(b, b->keeps, atomic); 1917ec681f3Smrg 1927ec681f3Smrg atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask; 1937ec681f3Smrg ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]); 1947ec681f3Smrg struct ir3_instruction *split; 1957ec681f3Smrg ir3_split_dest(b, &split, atomic, 0, 1); 1967ec681f3Smrg return split; 1977ec681f3Smrg} 1987ec681f3Smrg 1997ec681f3Smrg/* src[] = { deref, coord, sample_index }. const_index[] = {} */ 2007ec681f3Smrgstatic void 2017ec681f3Smrgemit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, 2027ec681f3Smrg struct ir3_instruction **dst) 2037ec681f3Smrg{ 2047ec681f3Smrg struct ir3_block *b = ctx->block; 2057ec681f3Smrg struct ir3_instruction *ldib; 2067ec681f3Smrg struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]); 2077ec681f3Smrg unsigned ncoords = ir3_get_image_coords(intr, NULL); 2087ec681f3Smrg 2097ec681f3Smrg ldib = ir3_LDIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0, 2107ec681f3Smrg ir3_create_collect(b, coords, ncoords), 0); 2117ec681f3Smrg ldib->dsts[0]->wrmask = MASK(intr->num_components); 2127ec681f3Smrg ldib->cat6.iim_val = intr->num_components; 2137ec681f3Smrg ldib->cat6.d = ncoords; 2147ec681f3Smrg ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr); 2157ec681f3Smrg ldib->cat6.typed = true; 2167ec681f3Smrg ldib->barrier_class = IR3_BARRIER_IMAGE_R; 2177ec681f3Smrg ldib->barrier_conflict = IR3_BARRIER_IMAGE_W; 2187ec681f3Smrg ir3_handle_bindless_cat6(ldib, intr->src[0]); 2197ec681f3Smrg ir3_handle_nonuniform(ldib, intr); 2207ec681f3Smrg 2217ec681f3Smrg ir3_split_dest(b, dst, ldib, 0, intr->num_components); 2227e102996Smaya} 2237e102996Smaya 2247e102996Smaya/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */ 2257e102996Smayastatic void 2267e102996Smayaemit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) 2277e102996Smaya{ 2287ec681f3Smrg struct ir3_block *b = ctx->block; 2297ec681f3Smrg struct ir3_instruction *stib; 2307ec681f3Smrg struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[3]); 2317ec681f3Smrg struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]); 2327ec681f3Smrg unsigned ncoords = ir3_get_image_coords(intr, NULL); 2337ec681f3Smrg enum pipe_format format = nir_intrinsic_format(intr); 2347ec681f3Smrg unsigned ncomp = ir3_get_num_components_for_image_format(format); 2357ec681f3Smrg 2367ec681f3Smrg /* src0 is offset, src1 is value: 2377ec681f3Smrg */ 2387ec681f3Smrg stib = ir3_STIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0, 2397ec681f3Smrg ir3_create_collect(b, coords, ncoords), 0, 2407ec681f3Smrg ir3_create_collect(b, value, ncomp), 0); 2417ec681f3Smrg stib->cat6.iim_val = ncomp; 2427ec681f3Smrg stib->cat6.d = ncoords; 2437ec681f3Smrg stib->cat6.type = ir3_get_type_for_image_intrinsic(intr); 2447ec681f3Smrg stib->cat6.typed = true; 2457ec681f3Smrg stib->barrier_class = IR3_BARRIER_IMAGE_W; 2467ec681f3Smrg stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; 2477ec681f3Smrg ir3_handle_bindless_cat6(stib, intr->src[0]); 2487ec681f3Smrg ir3_handle_nonuniform(stib, intr); 2497ec681f3Smrg 2507ec681f3Smrg array_insert(b, b->keeps, stib); 2517e102996Smaya} 2527e102996Smaya 2537e102996Smaya/* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */ 2547e102996Smayastatic struct ir3_instruction * 2557e102996Smayaemit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) 2567e102996Smaya{ 2577ec681f3Smrg struct ir3_block *b = ctx->block; 2587ec681f3Smrg struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy; 2597ec681f3Smrg struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]); 2607ec681f3Smrg struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0]; 2617ec681f3Smrg unsigned ncoords = ir3_get_image_coords(intr, NULL); 2627ec681f3Smrg 2637ec681f3Smrg ibo = ir3_image_to_ibo(ctx, intr->src[0]); 2647ec681f3Smrg 2657ec681f3Smrg /* So this gets a bit creative: 2667ec681f3Smrg * 2677ec681f3Smrg * src0 - vecN offset/coords 2687ec681f3Smrg * src1.x - is actually destination register 2697ec681f3Smrg * src1.y - is 'value' except for cmpxchg where src2.y is 'compare' 2707ec681f3Smrg * src1.z - is 'value' for cmpxchg 2717ec681f3Smrg * 2727ec681f3Smrg * The combining src and dest kinda doesn't work out so well with how 2737ec681f3Smrg * scheduling and RA work. So we create a dummy src2 which is tied to the 2747ec681f3Smrg * destination in RA (i.e. must be allocated to the same vec2/vec3 2757ec681f3Smrg * register) and then immediately extract the first component. 2767ec681f3Smrg */ 2777ec681f3Smrg dummy = create_immed(b, 0); 2787ec681f3Smrg src0 = ir3_create_collect(b, coords, ncoords); 2797ec681f3Smrg 2807ec681f3Smrg if (intr->intrinsic == nir_intrinsic_image_atomic_comp_swap || 2817ec681f3Smrg intr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap) { 2827ec681f3Smrg struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0]; 2837ec681f3Smrg src1 = ir3_collect(b, dummy, compare, value); 2847ec681f3Smrg } else { 2857ec681f3Smrg src1 = ir3_collect(b, dummy, value); 2867ec681f3Smrg } 2877ec681f3Smrg 2887ec681f3Smrg switch (intr->intrinsic) { 2897ec681f3Smrg case nir_intrinsic_image_atomic_add: 2907ec681f3Smrg case nir_intrinsic_bindless_image_atomic_add: 2917ec681f3Smrg atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0); 2927ec681f3Smrg break; 2937ec681f3Smrg case nir_intrinsic_image_atomic_imin: 2947ec681f3Smrg case nir_intrinsic_image_atomic_umin: 2957ec681f3Smrg case nir_intrinsic_bindless_image_atomic_imin: 2967ec681f3Smrg case nir_intrinsic_bindless_image_atomic_umin: 2977ec681f3Smrg atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0); 2987ec681f3Smrg break; 2997ec681f3Smrg case nir_intrinsic_image_atomic_imax: 3007ec681f3Smrg case nir_intrinsic_image_atomic_umax: 3017ec681f3Smrg case nir_intrinsic_bindless_image_atomic_imax: 3027ec681f3Smrg case nir_intrinsic_bindless_image_atomic_umax: 3037ec681f3Smrg atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0); 3047ec681f3Smrg break; 3057ec681f3Smrg case nir_intrinsic_image_atomic_and: 3067ec681f3Smrg case nir_intrinsic_bindless_image_atomic_and: 3077ec681f3Smrg atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0); 3087ec681f3Smrg break; 3097ec681f3Smrg case nir_intrinsic_image_atomic_or: 3107ec681f3Smrg case nir_intrinsic_bindless_image_atomic_or: 3117ec681f3Smrg atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0); 3127ec681f3Smrg break; 3137ec681f3Smrg case nir_intrinsic_image_atomic_xor: 3147ec681f3Smrg case nir_intrinsic_bindless_image_atomic_xor: 3157ec681f3Smrg atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0); 3167ec681f3Smrg break; 3177ec681f3Smrg case nir_intrinsic_image_atomic_exchange: 3187ec681f3Smrg case nir_intrinsic_bindless_image_atomic_exchange: 3197ec681f3Smrg atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0); 3207ec681f3Smrg break; 3217ec681f3Smrg case nir_intrinsic_image_atomic_comp_swap: 3227ec681f3Smrg case nir_intrinsic_bindless_image_atomic_comp_swap: 3237ec681f3Smrg atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0); 3247ec681f3Smrg break; 3257ec681f3Smrg default: 3267ec681f3Smrg unreachable("boo"); 3277ec681f3Smrg } 3287ec681f3Smrg 3297ec681f3Smrg atomic->cat6.iim_val = 1; 3307ec681f3Smrg atomic->cat6.d = ncoords; 3317ec681f3Smrg atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr); 3327ec681f3Smrg atomic->cat6.typed = true; 3337ec681f3Smrg atomic->barrier_class = IR3_BARRIER_IMAGE_W; 3347ec681f3Smrg atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; 3357ec681f3Smrg ir3_handle_bindless_cat6(atomic, intr->src[0]); 3367ec681f3Smrg 3377ec681f3Smrg /* even if nothing consume the result, we can't DCE the instruction: */ 3387ec681f3Smrg array_insert(b, b->keeps, atomic); 3397ec681f3Smrg 3407ec681f3Smrg atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask; 3417ec681f3Smrg ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]); 3427ec681f3Smrg struct ir3_instruction *split; 3437ec681f3Smrg ir3_split_dest(b, &split, atomic, 0, 1); 3447ec681f3Smrg return split; 3457e102996Smaya} 3467e102996Smaya 3477ec681f3Smrgstatic void 3487ec681f3Smrgemit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, 3497ec681f3Smrg struct ir3_instruction **dst) 3507ec681f3Smrg{ 3517ec681f3Smrg struct ir3_block *b = ctx->block; 3527ec681f3Smrg struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]); 3537ec681f3Smrg struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0); 3547ec681f3Smrg resinfo->cat6.iim_val = 1; 3557ec681f3Smrg resinfo->cat6.d = intr->num_components; 3567ec681f3Smrg resinfo->cat6.type = TYPE_U32; 3577ec681f3Smrg resinfo->cat6.typed = false; 3587ec681f3Smrg /* resinfo has no writemask and always writes out 3 components: */ 3597ec681f3Smrg compile_assert(ctx, intr->num_components <= 3); 3607ec681f3Smrg resinfo->dsts[0]->wrmask = MASK(3); 3617ec681f3Smrg ir3_handle_bindless_cat6(resinfo, intr->src[0]); 3627ec681f3Smrg ir3_handle_nonuniform(resinfo, intr); 3637ec681f3Smrg 3647ec681f3Smrg ir3_split_dest(b, dst, resinfo, 0, intr->num_components); 3657ec681f3Smrg} 3667e102996Smaya 3677ec681f3Smrgstatic void 3687ec681f3Smrgemit_intrinsic_load_global_ir3(struct ir3_context *ctx, 3697ec681f3Smrg nir_intrinsic_instr *intr, 3707ec681f3Smrg struct ir3_instruction **dst) 3717e102996Smaya{ 3727ec681f3Smrg struct ir3_block *b = ctx->block; 3737ec681f3Smrg unsigned dest_components = nir_intrinsic_dest_components(intr); 3747ec681f3Smrg struct ir3_instruction *addr, *offset; 3757ec681f3Smrg 3767ec681f3Smrg addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0], 3777ec681f3Smrg ir3_get_src(ctx, &intr->src[0])[1]); 3787ec681f3Smrg 3797ec681f3Smrg offset = ir3_get_src(ctx, &intr->src[1])[0]; 3807ec681f3Smrg 3817ec681f3Smrg struct ir3_instruction *load = 3827ec681f3Smrg ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0, 3837ec681f3Smrg create_immed(b, 0), 0, create_immed(b, dest_components), 0); 3847ec681f3Smrg load->cat6.type = TYPE_U32; 3857ec681f3Smrg load->dsts[0]->wrmask = MASK(dest_components); 3867ec681f3Smrg 3877ec681f3Smrg load->barrier_class = IR3_BARRIER_BUFFER_R; 3887ec681f3Smrg load->barrier_conflict = IR3_BARRIER_BUFFER_W; 3897ec681f3Smrg 3907ec681f3Smrg ir3_split_dest(b, dst, load, 0, dest_components); 3917e102996Smaya} 3927e102996Smaya 3937ec681f3Smrgstatic void 3947ec681f3Smrgemit_intrinsic_store_global_ir3(struct ir3_context *ctx, 3957ec681f3Smrg nir_intrinsic_instr *intr) 3967e102996Smaya{ 3977ec681f3Smrg struct ir3_block *b = ctx->block; 3987ec681f3Smrg struct ir3_instruction *value, *addr, *offset; 3997ec681f3Smrg unsigned ncomp = nir_intrinsic_src_components(intr, 0); 4007e102996Smaya 4017ec681f3Smrg addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0], 4027ec681f3Smrg ir3_get_src(ctx, &intr->src[1])[1]); 4037ec681f3Smrg 4047ec681f3Smrg offset = ir3_get_src(ctx, &intr->src[2])[0]; 4057ec681f3Smrg 4067ec681f3Smrg value = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp); 4077ec681f3Smrg 4087ec681f3Smrg struct ir3_instruction *stg = 4097ec681f3Smrg ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0, 4107ec681f3Smrg create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0); 4117ec681f3Smrg stg->cat6.type = TYPE_U32; 4127ec681f3Smrg stg->cat6.iim_val = 1; 4137ec681f3Smrg 4147ec681f3Smrg array_insert(b, b->keeps, stg); 4157ec681f3Smrg 4167ec681f3Smrg stg->barrier_class = IR3_BARRIER_BUFFER_W; 4177ec681f3Smrg stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; 4187e102996Smaya} 4197ec681f3Smrg 4207ec681f3Smrgconst struct ir3_context_funcs ir3_a6xx_funcs = { 4217ec681f3Smrg .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo, 4227ec681f3Smrg .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo, 4237ec681f3Smrg .emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo, 4247ec681f3Smrg .emit_intrinsic_load_image = emit_intrinsic_load_image, 4257ec681f3Smrg .emit_intrinsic_store_image = emit_intrinsic_store_image, 4267ec681f3Smrg .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image, 4277ec681f3Smrg .emit_intrinsic_image_size = emit_intrinsic_image_size, 4287ec681f3Smrg .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3, 4297ec681f3Smrg .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3, 4307ec681f3Smrg}; 431