17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2014 Advanced Micro Devices, Inc. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the 67ec681f3Smrg * "Software"), to deal in the Software without restriction, including 77ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish, 87ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to 97ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to 107ec681f3Smrg * the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 137ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 147ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 157ec681f3Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 167ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 177ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 187ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 197ec681f3Smrg * 207ec681f3Smrg * The above copyright notice and this permission notice (including the 217ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions 227ec681f3Smrg * of the Software. 237ec681f3Smrg * 247ec681f3Smrg */ 257ec681f3Smrg/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ 267ec681f3Smrg#include "ac_llvm_build.h" 277ec681f3Smrg 287ec681f3Smrg#include "ac_exp_param.h" 297ec681f3Smrg#include "ac_llvm_util.h" 307ec681f3Smrg#include "ac_shader_util.h" 317ec681f3Smrg#include "c11/threads.h" 327ec681f3Smrg#include "shader_enums.h" 337ec681f3Smrg#include "sid.h" 347ec681f3Smrg#include "util/bitscan.h" 357ec681f3Smrg#include "util/macros.h" 367ec681f3Smrg#include "util/u_atomic.h" 377ec681f3Smrg#include "util/u_math.h" 387ec681f3Smrg#include <llvm-c/Core.h> 397ec681f3Smrg#include <llvm/Config/llvm-config.h> 407ec681f3Smrg 417ec681f3Smrg#include <assert.h> 427ec681f3Smrg#include <stdio.h> 437ec681f3Smrg 447ec681f3Smrg#define AC_LLVM_INITIAL_CF_DEPTH 4 457ec681f3Smrg 467ec681f3Smrg/* Data for if/else/endif and bgnloop/endloop control flow structures. 477ec681f3Smrg */ 487ec681f3Smrgstruct ac_llvm_flow { 497ec681f3Smrg /* Loop exit or next part of if/else/endif. */ 507ec681f3Smrg LLVMBasicBlockRef next_block; 517ec681f3Smrg LLVMBasicBlockRef loop_entry_block; 527ec681f3Smrg}; 537ec681f3Smrg 547ec681f3Smrg/* Initialize module-independent parts of the context. 557ec681f3Smrg * 567ec681f3Smrg * The caller is responsible for initializing ctx::module and ctx::builder. 577ec681f3Smrg */ 587ec681f3Smrgvoid ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler, 597ec681f3Smrg enum chip_class chip_class, enum radeon_family family, 607ec681f3Smrg const struct radeon_info *info, 617ec681f3Smrg enum ac_float_mode float_mode, unsigned wave_size, 627ec681f3Smrg unsigned ballot_mask_bits) 637ec681f3Smrg{ 647ec681f3Smrg ctx->context = LLVMContextCreate(); 657ec681f3Smrg 667ec681f3Smrg ctx->chip_class = chip_class; 677ec681f3Smrg ctx->family = family; 687ec681f3Smrg ctx->info = info; 697ec681f3Smrg ctx->wave_size = wave_size; 707ec681f3Smrg ctx->ballot_mask_bits = ballot_mask_bits; 717ec681f3Smrg ctx->float_mode = float_mode; 727ec681f3Smrg ctx->module = ac_create_module(compiler->tm, ctx->context); 737ec681f3Smrg ctx->builder = ac_create_builder(ctx->context, float_mode); 747ec681f3Smrg 757ec681f3Smrg ctx->voidt = LLVMVoidTypeInContext(ctx->context); 767ec681f3Smrg ctx->i1 = LLVMInt1TypeInContext(ctx->context); 777ec681f3Smrg ctx->i8 = LLVMInt8TypeInContext(ctx->context); 787ec681f3Smrg ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); 797ec681f3Smrg ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); 807ec681f3Smrg ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); 817ec681f3Smrg ctx->i128 = LLVMIntTypeInContext(ctx->context, 128); 827ec681f3Smrg ctx->intptr = ctx->i32; 837ec681f3Smrg ctx->f16 = LLVMHalfTypeInContext(ctx->context); 847ec681f3Smrg ctx->f32 = LLVMFloatTypeInContext(ctx->context); 857ec681f3Smrg ctx->f64 = LLVMDoubleTypeInContext(ctx->context); 867ec681f3Smrg ctx->v2i16 = LLVMVectorType(ctx->i16, 2); 877ec681f3Smrg ctx->v4i16 = LLVMVectorType(ctx->i16, 4); 887ec681f3Smrg ctx->v2f16 = LLVMVectorType(ctx->f16, 2); 897ec681f3Smrg ctx->v4f16 = LLVMVectorType(ctx->f16, 4); 907ec681f3Smrg ctx->v2i32 = LLVMVectorType(ctx->i32, 2); 917ec681f3Smrg ctx->v3i32 = LLVMVectorType(ctx->i32, 3); 927ec681f3Smrg ctx->v4i32 = LLVMVectorType(ctx->i32, 4); 937ec681f3Smrg ctx->v2f32 = LLVMVectorType(ctx->f32, 2); 947ec681f3Smrg ctx->v3f32 = LLVMVectorType(ctx->f32, 3); 957ec681f3Smrg ctx->v4f32 = LLVMVectorType(ctx->f32, 4); 967ec681f3Smrg ctx->v8i32 = LLVMVectorType(ctx->i32, 8); 977ec681f3Smrg ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size); 987ec681f3Smrg ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits); 997ec681f3Smrg 1007ec681f3Smrg ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); 1017ec681f3Smrg ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); 1027ec681f3Smrg ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false); 1037ec681f3Smrg ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false); 1047ec681f3Smrg ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); 1057ec681f3Smrg ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); 1067ec681f3Smrg ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false); 1077ec681f3Smrg ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false); 1087ec681f3Smrg ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false); 1097ec681f3Smrg ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false); 1107ec681f3Smrg ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0); 1117ec681f3Smrg ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0); 1127ec681f3Smrg ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); 1137ec681f3Smrg ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0); 1147ec681f3Smrg ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0); 1157ec681f3Smrg ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0); 1167ec681f3Smrg 1177ec681f3Smrg ctx->i1false = LLVMConstInt(ctx->i1, 0, false); 1187ec681f3Smrg ctx->i1true = LLVMConstInt(ctx->i1, 1, false); 1197ec681f3Smrg 1207ec681f3Smrg ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, "range", 5); 1217ec681f3Smrg 1227ec681f3Smrg ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, "invariant.load", 14); 1237ec681f3Smrg 1247ec681f3Smrg ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14); 1257ec681f3Smrg 1267ec681f3Smrg ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); 1277ec681f3Smrg ctx->flow = calloc(1, sizeof(*ctx->flow)); 1287ec681f3Smrg} 1297ec681f3Smrg 1307ec681f3Smrgvoid ac_llvm_context_dispose(struct ac_llvm_context *ctx) 1317ec681f3Smrg{ 1327ec681f3Smrg free(ctx->flow->stack); 1337ec681f3Smrg free(ctx->flow); 1347ec681f3Smrg ctx->flow = NULL; 1357ec681f3Smrg} 1367ec681f3Smrg 1377ec681f3Smrgint ac_get_llvm_num_components(LLVMValueRef value) 1387ec681f3Smrg{ 1397ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(value); 1407ec681f3Smrg unsigned num_components = 1417ec681f3Smrg LLVMGetTypeKind(type) == LLVMVectorTypeKind ? LLVMGetVectorSize(type) : 1; 1427ec681f3Smrg return num_components; 1437ec681f3Smrg} 1447ec681f3Smrg 1457ec681f3SmrgLLVMValueRef ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index) 1467ec681f3Smrg{ 1477ec681f3Smrg if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) { 1487ec681f3Smrg assert(index == 0); 1497ec681f3Smrg return value; 1507ec681f3Smrg } 1517ec681f3Smrg 1527ec681f3Smrg return LLVMBuildExtractElement(ac->builder, value, LLVMConstInt(ac->i32, index, false), ""); 1537ec681f3Smrg} 1547ec681f3Smrg 1557ec681f3Smrgint ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) 1567ec681f3Smrg{ 1577ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) 1587ec681f3Smrg type = LLVMGetElementType(type); 1597ec681f3Smrg 1607ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) 1617ec681f3Smrg return LLVMGetIntTypeWidth(type); 1627ec681f3Smrg 1637ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { 1647ec681f3Smrg if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_LDS) 1657ec681f3Smrg return 32; 1667ec681f3Smrg } 1677ec681f3Smrg 1687ec681f3Smrg if (type == ctx->f16) 1697ec681f3Smrg return 16; 1707ec681f3Smrg if (type == ctx->f32) 1717ec681f3Smrg return 32; 1727ec681f3Smrg if (type == ctx->f64) 1737ec681f3Smrg return 64; 1747ec681f3Smrg 1757ec681f3Smrg unreachable("Unhandled type kind in get_elem_bits"); 1767ec681f3Smrg} 1777ec681f3Smrg 1787ec681f3Smrgunsigned ac_get_type_size(LLVMTypeRef type) 1797ec681f3Smrg{ 1807ec681f3Smrg LLVMTypeKind kind = LLVMGetTypeKind(type); 1817ec681f3Smrg 1827ec681f3Smrg switch (kind) { 1837ec681f3Smrg case LLVMIntegerTypeKind: 1847ec681f3Smrg return LLVMGetIntTypeWidth(type) / 8; 1857ec681f3Smrg case LLVMHalfTypeKind: 1867ec681f3Smrg return 2; 1877ec681f3Smrg case LLVMFloatTypeKind: 1887ec681f3Smrg return 4; 1897ec681f3Smrg case LLVMDoubleTypeKind: 1907ec681f3Smrg return 8; 1917ec681f3Smrg case LLVMPointerTypeKind: 1927ec681f3Smrg if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT) 1937ec681f3Smrg return 4; 1947ec681f3Smrg return 8; 1957ec681f3Smrg case LLVMVectorTypeKind: 1967ec681f3Smrg return LLVMGetVectorSize(type) * ac_get_type_size(LLVMGetElementType(type)); 1977ec681f3Smrg case LLVMArrayTypeKind: 1987ec681f3Smrg return LLVMGetArrayLength(type) * ac_get_type_size(LLVMGetElementType(type)); 1997ec681f3Smrg default: 2007ec681f3Smrg assert(0); 2017ec681f3Smrg return 0; 2027ec681f3Smrg } 2037ec681f3Smrg} 2047ec681f3Smrg 2057ec681f3Smrgstatic LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) 2067ec681f3Smrg{ 2077ec681f3Smrg if (t == ctx->i1) 2087ec681f3Smrg return ctx->i1; 2097ec681f3Smrg else if (t == ctx->i8) 2107ec681f3Smrg return ctx->i8; 2117ec681f3Smrg else if (t == ctx->f16 || t == ctx->i16) 2127ec681f3Smrg return ctx->i16; 2137ec681f3Smrg else if (t == ctx->f32 || t == ctx->i32) 2147ec681f3Smrg return ctx->i32; 2157ec681f3Smrg else if (t == ctx->f64 || t == ctx->i64) 2167ec681f3Smrg return ctx->i64; 2177ec681f3Smrg else 2187ec681f3Smrg unreachable("Unhandled integer size"); 2197ec681f3Smrg} 2207ec681f3Smrg 2217ec681f3SmrgLLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t) 2227ec681f3Smrg{ 2237ec681f3Smrg if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { 2247ec681f3Smrg LLVMTypeRef elem_type = LLVMGetElementType(t); 2257ec681f3Smrg return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), LLVMGetVectorSize(t)); 2267ec681f3Smrg } 2277ec681f3Smrg if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) { 2287ec681f3Smrg switch (LLVMGetPointerAddressSpace(t)) { 2297ec681f3Smrg case AC_ADDR_SPACE_GLOBAL: 2307ec681f3Smrg return ctx->i64; 2317ec681f3Smrg case AC_ADDR_SPACE_CONST_32BIT: 2327ec681f3Smrg case AC_ADDR_SPACE_LDS: 2337ec681f3Smrg return ctx->i32; 2347ec681f3Smrg default: 2357ec681f3Smrg unreachable("unhandled address space"); 2367ec681f3Smrg } 2377ec681f3Smrg } 2387ec681f3Smrg return to_integer_type_scalar(ctx, t); 2397ec681f3Smrg} 2407ec681f3Smrg 2417ec681f3SmrgLLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v) 2427ec681f3Smrg{ 2437ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(v); 2447ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { 2457ec681f3Smrg return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), ""); 2467ec681f3Smrg } 2477ec681f3Smrg return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), ""); 2487ec681f3Smrg} 2497ec681f3Smrg 2507ec681f3SmrgLLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v) 2517ec681f3Smrg{ 2527ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(v); 2537ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) 2547ec681f3Smrg return v; 2557ec681f3Smrg return ac_to_integer(ctx, v); 2567ec681f3Smrg} 2577ec681f3Smrg 2587ec681f3Smrgstatic LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) 2597ec681f3Smrg{ 2607ec681f3Smrg if (t == ctx->i8) 2617ec681f3Smrg return ctx->i8; 2627ec681f3Smrg else if (t == ctx->i16 || t == ctx->f16) 2637ec681f3Smrg return ctx->f16; 2647ec681f3Smrg else if (t == ctx->i32 || t == ctx->f32) 2657ec681f3Smrg return ctx->f32; 2667ec681f3Smrg else if (t == ctx->i64 || t == ctx->f64) 2677ec681f3Smrg return ctx->f64; 2687ec681f3Smrg else 2697ec681f3Smrg unreachable("Unhandled float size"); 2707ec681f3Smrg} 2717ec681f3Smrg 2727ec681f3SmrgLLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t) 2737ec681f3Smrg{ 2747ec681f3Smrg if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { 2757ec681f3Smrg LLVMTypeRef elem_type = LLVMGetElementType(t); 2767ec681f3Smrg return LLVMVectorType(to_float_type_scalar(ctx, elem_type), LLVMGetVectorSize(t)); 2777ec681f3Smrg } 2787ec681f3Smrg return to_float_type_scalar(ctx, t); 2797ec681f3Smrg} 2807ec681f3Smrg 2817ec681f3SmrgLLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v) 2827ec681f3Smrg{ 2837ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(v); 2847ec681f3Smrg return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), ""); 2857ec681f3Smrg} 2867ec681f3Smrg 2877ec681f3SmrgLLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, 2887ec681f3Smrg LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count, 2897ec681f3Smrg unsigned attrib_mask) 2907ec681f3Smrg{ 2917ec681f3Smrg LLVMValueRef function, call; 2927ec681f3Smrg bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY); 2937ec681f3Smrg 2947ec681f3Smrg function = LLVMGetNamedFunction(ctx->module, name); 2957ec681f3Smrg if (!function) { 2967ec681f3Smrg LLVMTypeRef param_types[32], function_type; 2977ec681f3Smrg unsigned i; 2987ec681f3Smrg 2997ec681f3Smrg assert(param_count <= 32); 3007ec681f3Smrg 3017ec681f3Smrg for (i = 0; i < param_count; ++i) { 3027ec681f3Smrg assert(params[i]); 3037ec681f3Smrg param_types[i] = LLVMTypeOf(params[i]); 3047ec681f3Smrg } 3057ec681f3Smrg function_type = LLVMFunctionType(return_type, param_types, param_count, 0); 3067ec681f3Smrg function = LLVMAddFunction(ctx->module, name, function_type); 3077ec681f3Smrg 3087ec681f3Smrg LLVMSetFunctionCallConv(function, LLVMCCallConv); 3097ec681f3Smrg LLVMSetLinkage(function, LLVMExternalLinkage); 3107ec681f3Smrg 3117ec681f3Smrg if (!set_callsite_attrs) 3127ec681f3Smrg ac_add_func_attributes(ctx->context, function, attrib_mask); 3137ec681f3Smrg } 3147ec681f3Smrg 3157ec681f3Smrg call = LLVMBuildCall(ctx->builder, function, params, param_count, ""); 3167ec681f3Smrg if (set_callsite_attrs) 3177ec681f3Smrg ac_add_func_attributes(ctx->context, call, attrib_mask); 3187ec681f3Smrg return call; 3197ec681f3Smrg} 3207ec681f3Smrg 3217ec681f3Smrg/** 3227ec681f3Smrg * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with 3237ec681f3Smrg * intrinsic names). 3247ec681f3Smrg */ 3257ec681f3Smrgvoid ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) 3267ec681f3Smrg{ 3277ec681f3Smrg LLVMTypeRef elem_type = type; 3287ec681f3Smrg 3297ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMStructTypeKind) { 3307ec681f3Smrg unsigned count = LLVMCountStructElementTypes(type); 3317ec681f3Smrg int ret = snprintf(buf, bufsize, "sl_"); 3327ec681f3Smrg buf += ret; 3337ec681f3Smrg bufsize -= ret; 3347ec681f3Smrg 3357ec681f3Smrg LLVMTypeRef *elems = alloca(count * sizeof(LLVMTypeRef)); 3367ec681f3Smrg LLVMGetStructElementTypes(type, elems); 3377ec681f3Smrg 3387ec681f3Smrg for (unsigned i = 0; i < count; i++) { 3397ec681f3Smrg ac_build_type_name_for_intr(elems[i], buf, bufsize); 3407ec681f3Smrg ret = strlen(buf); 3417ec681f3Smrg buf += ret; 3427ec681f3Smrg bufsize -= ret; 3437ec681f3Smrg } 3447ec681f3Smrg 3457ec681f3Smrg snprintf(buf, bufsize, "s"); 3467ec681f3Smrg return; 3477ec681f3Smrg } 3487ec681f3Smrg 3497ec681f3Smrg assert(bufsize >= 8); 3507ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { 3517ec681f3Smrg int ret = snprintf(buf, bufsize, "v%u", LLVMGetVectorSize(type)); 3527ec681f3Smrg if (ret < 0) { 3537ec681f3Smrg char *type_name = LLVMPrintTypeToString(type); 3547ec681f3Smrg fprintf(stderr, "Error building type name for: %s\n", type_name); 3557ec681f3Smrg LLVMDisposeMessage(type_name); 3567ec681f3Smrg return; 3577ec681f3Smrg } 3587ec681f3Smrg elem_type = LLVMGetElementType(type); 3597ec681f3Smrg buf += ret; 3607ec681f3Smrg bufsize -= ret; 3617ec681f3Smrg } 3627ec681f3Smrg switch (LLVMGetTypeKind(elem_type)) { 3637ec681f3Smrg default: 3647ec681f3Smrg break; 3657ec681f3Smrg case LLVMIntegerTypeKind: 3667ec681f3Smrg snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); 3677ec681f3Smrg break; 3687ec681f3Smrg case LLVMHalfTypeKind: 3697ec681f3Smrg snprintf(buf, bufsize, "f16"); 3707ec681f3Smrg break; 3717ec681f3Smrg case LLVMFloatTypeKind: 3727ec681f3Smrg snprintf(buf, bufsize, "f32"); 3737ec681f3Smrg break; 3747ec681f3Smrg case LLVMDoubleTypeKind: 3757ec681f3Smrg snprintf(buf, bufsize, "f64"); 3767ec681f3Smrg break; 3777ec681f3Smrg } 3787ec681f3Smrg} 3797ec681f3Smrg 3807ec681f3Smrg/** 3817ec681f3Smrg * Helper function that builds an LLVM IR PHI node and immediately adds 3827ec681f3Smrg * incoming edges. 3837ec681f3Smrg */ 3847ec681f3SmrgLLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigned count_incoming, 3857ec681f3Smrg LLVMValueRef *values, LLVMBasicBlockRef *blocks) 3867ec681f3Smrg{ 3877ec681f3Smrg LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, ""); 3887ec681f3Smrg LLVMAddIncoming(phi, values, blocks, count_incoming); 3897ec681f3Smrg return phi; 3907ec681f3Smrg} 3917ec681f3Smrg 3927ec681f3Smrgvoid ac_build_s_barrier(struct ac_llvm_context *ctx) 3937ec681f3Smrg{ 3947ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); 3957ec681f3Smrg} 3967ec681f3Smrg 3977ec681f3Smrg/* Prevent optimizations (at least of memory accesses) across the current 3987ec681f3Smrg * point in the program by emitting empty inline assembly that is marked as 3997ec681f3Smrg * having side effects. 4007ec681f3Smrg * 4017ec681f3Smrg * Optionally, a value can be passed through the inline assembly to prevent 4027ec681f3Smrg * LLVM from hoisting calls to ReadNone functions. 4037ec681f3Smrg */ 4047ec681f3Smrgvoid ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pgpr, bool sgpr) 4057ec681f3Smrg{ 4067ec681f3Smrg static int counter = 0; 4077ec681f3Smrg 4087ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 4097ec681f3Smrg char code[16]; 4107ec681f3Smrg const char *constraint = sgpr ? "=s,0" : "=v,0"; 4117ec681f3Smrg 4127ec681f3Smrg snprintf(code, sizeof(code), "; %d", (int)p_atomic_inc_return(&counter)); 4137ec681f3Smrg 4147ec681f3Smrg if (!pgpr) { 4157ec681f3Smrg LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); 4167ec681f3Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); 4177ec681f3Smrg LLVMBuildCall(builder, inlineasm, NULL, 0, ""); 4187ec681f3Smrg } else if (LLVMTypeOf(*pgpr) == ctx->i32) { 4197ec681f3Smrg /* Simple version for i32 that allows the caller to set LLVM metadata on the call 4207ec681f3Smrg * instruction. */ 4217ec681f3Smrg LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); 4227ec681f3Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false); 4237ec681f3Smrg 4247ec681f3Smrg *pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, ""); 4257ec681f3Smrg } else if (LLVMTypeOf(*pgpr) == ctx->i16) { 4267ec681f3Smrg /* Simple version for i16 that allows the caller to set LLVM metadata on the call 4277ec681f3Smrg * instruction. */ 4287ec681f3Smrg LLVMTypeRef ftype = LLVMFunctionType(ctx->i16, &ctx->i16, 1, false); 4297ec681f3Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false); 4307ec681f3Smrg 4317ec681f3Smrg *pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, ""); 4327ec681f3Smrg } else if (LLVMGetTypeKind(LLVMTypeOf(*pgpr)) == LLVMPointerTypeKind) { 4337ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(*pgpr); 4347ec681f3Smrg LLVMTypeRef ftype = LLVMFunctionType(type, &type, 1, false); 4357ec681f3Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false); 4367ec681f3Smrg 4377ec681f3Smrg *pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, ""); 4387ec681f3Smrg } else { 4397ec681f3Smrg LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); 4407ec681f3Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false); 4417ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(*pgpr); 4427ec681f3Smrg unsigned bitsize = ac_get_elem_bits(ctx, type); 4437ec681f3Smrg LLVMValueRef vgpr = *pgpr; 4447ec681f3Smrg LLVMTypeRef vgpr_type; 4457ec681f3Smrg unsigned vgpr_size; 4467ec681f3Smrg LLVMValueRef vgpr0; 4477ec681f3Smrg 4487ec681f3Smrg if (bitsize < 32) 4497ec681f3Smrg vgpr = LLVMBuildZExt(ctx->builder, vgpr, ctx->i32, ""); 4507ec681f3Smrg 4517ec681f3Smrg vgpr_type = LLVMTypeOf(vgpr); 4527ec681f3Smrg vgpr_size = ac_get_type_size(vgpr_type); 4537ec681f3Smrg 4547ec681f3Smrg assert(vgpr_size % 4 == 0); 4557ec681f3Smrg 4567ec681f3Smrg vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); 4577ec681f3Smrg vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); 4587ec681f3Smrg vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); 4597ec681f3Smrg vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); 4607ec681f3Smrg vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); 4617ec681f3Smrg 4627ec681f3Smrg if (bitsize < 32) 4637ec681f3Smrg vgpr = LLVMBuildTrunc(builder, vgpr, type, ""); 4647ec681f3Smrg 4657ec681f3Smrg *pgpr = vgpr; 4667ec681f3Smrg } 4677ec681f3Smrg} 4687ec681f3Smrg 4697ec681f3SmrgLLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope) 4707ec681f3Smrg{ 4717ec681f3Smrg const char *subgroup = "llvm.readcyclecounter"; 4727ec681f3Smrg const char *name = scope == NIR_SCOPE_DEVICE ? "llvm.amdgcn.s.memrealtime" : subgroup; 4737ec681f3Smrg 4747ec681f3Smrg LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0); 4757ec681f3Smrg return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, ""); 4767ec681f3Smrg} 4777ec681f3Smrg 4787ec681f3SmrgLLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value) 4797ec681f3Smrg{ 4807ec681f3Smrg const char *name; 4817ec681f3Smrg 4827ec681f3Smrg if (LLVMTypeOf(value) == ctx->i1) 4837ec681f3Smrg value = LLVMBuildZExt(ctx->builder, value, ctx->i32, ""); 4847ec681f3Smrg 4857ec681f3Smrg if (ctx->wave_size == 64) 4867ec681f3Smrg name = "llvm.amdgcn.icmp.i64.i32"; 4877ec681f3Smrg else 4887ec681f3Smrg name = "llvm.amdgcn.icmp.i32.i32"; 4897ec681f3Smrg 4907ec681f3Smrg LLVMValueRef args[3] = {value, ctx->i32_0, LLVMConstInt(ctx->i32, LLVMIntNE, 0)}; 4917ec681f3Smrg 4927ec681f3Smrg /* We currently have no other way to prevent LLVM from lifting the icmp 4937ec681f3Smrg * calls to a dominating basic block. 4947ec681f3Smrg */ 4957ec681f3Smrg ac_build_optimization_barrier(ctx, &args[0], false); 4967ec681f3Smrg 4977ec681f3Smrg args[0] = ac_to_integer(ctx, args[0]); 4987ec681f3Smrg 4997ec681f3Smrg return ac_build_intrinsic( 5007ec681f3Smrg ctx, name, ctx->iN_wavemask, args, 3, 5017ec681f3Smrg AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 5027ec681f3Smrg} 5037ec681f3Smrg 5047ec681f3SmrgLLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMValueRef value) 5057ec681f3Smrg{ 5067ec681f3Smrg const char *name; 5077ec681f3Smrg 5087ec681f3Smrg if (ctx->wave_size == 64) 5097ec681f3Smrg name = "llvm.amdgcn.icmp.i64.i1"; 5107ec681f3Smrg else 5117ec681f3Smrg name = "llvm.amdgcn.icmp.i32.i1"; 5127ec681f3Smrg 5137ec681f3Smrg LLVMValueRef args[3] = { 5147ec681f3Smrg value, 5157ec681f3Smrg ctx->i1false, 5167ec681f3Smrg LLVMConstInt(ctx->i32, LLVMIntNE, 0), 5177ec681f3Smrg }; 5187ec681f3Smrg 5197ec681f3Smrg return ac_build_intrinsic( 5207ec681f3Smrg ctx, name, ctx->iN_wavemask, args, 3, 5217ec681f3Smrg AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 5227ec681f3Smrg} 5237ec681f3Smrg 5247ec681f3SmrgLLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value) 5257ec681f3Smrg{ 5267ec681f3Smrg LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); 5277ec681f3Smrg LLVMValueRef vote_set = ac_build_ballot(ctx, value); 5287ec681f3Smrg return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, ""); 5297ec681f3Smrg} 5307ec681f3Smrg 5317ec681f3SmrgLLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) 5327ec681f3Smrg{ 5337ec681f3Smrg LLVMValueRef vote_set = ac_build_ballot(ctx, value); 5347ec681f3Smrg return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0), 5357ec681f3Smrg ""); 5367ec681f3Smrg} 5377ec681f3Smrg 5387ec681f3SmrgLLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) 5397ec681f3Smrg{ 5407ec681f3Smrg LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); 5417ec681f3Smrg LLVMValueRef vote_set = ac_build_ballot(ctx, value); 5427ec681f3Smrg 5437ec681f3Smrg LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, ""); 5447ec681f3Smrg LLVMValueRef none = 5457ec681f3Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); 5467ec681f3Smrg return LLVMBuildOr(ctx->builder, all, none, ""); 5477ec681f3Smrg} 5487ec681f3Smrg 5497ec681f3SmrgLLVMValueRef ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, 5507ec681f3Smrg unsigned value_count, unsigned component) 5517ec681f3Smrg{ 5527ec681f3Smrg LLVMValueRef vec = NULL; 5537ec681f3Smrg 5547ec681f3Smrg if (value_count == 1) { 5557ec681f3Smrg return values[component]; 5567ec681f3Smrg } else if (!value_count) 5577ec681f3Smrg unreachable("value_count is 0"); 5587ec681f3Smrg 5597ec681f3Smrg for (unsigned i = component; i < value_count + component; i++) { 5607ec681f3Smrg LLVMValueRef value = values[i]; 5617ec681f3Smrg 5627ec681f3Smrg if (i == component) 5637ec681f3Smrg vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count)); 5647ec681f3Smrg LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false); 5657ec681f3Smrg vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); 5667ec681f3Smrg } 5677ec681f3Smrg return vec; 5687ec681f3Smrg} 5697ec681f3Smrg 5707ec681f3SmrgLLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, 5717ec681f3Smrg unsigned value_count, unsigned value_stride, bool load, 5727ec681f3Smrg bool always_vector) 5737ec681f3Smrg{ 5747ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 5757ec681f3Smrg LLVMValueRef vec = NULL; 5767ec681f3Smrg unsigned i; 5777ec681f3Smrg 5787ec681f3Smrg if (value_count == 1 && !always_vector) { 5797ec681f3Smrg if (load) 5807ec681f3Smrg return LLVMBuildLoad(builder, values[0], ""); 5817ec681f3Smrg return values[0]; 5827ec681f3Smrg } else if (!value_count) 5837ec681f3Smrg unreachable("value_count is 0"); 5847ec681f3Smrg 5857ec681f3Smrg for (i = 0; i < value_count; i++) { 5867ec681f3Smrg LLVMValueRef value = values[i * value_stride]; 5877ec681f3Smrg if (load) 5887ec681f3Smrg value = LLVMBuildLoad(builder, value, ""); 5897ec681f3Smrg 5907ec681f3Smrg if (!i) 5917ec681f3Smrg vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count)); 5927ec681f3Smrg LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); 5937ec681f3Smrg vec = LLVMBuildInsertElement(builder, vec, value, index, ""); 5947ec681f3Smrg } 5957ec681f3Smrg return vec; 5967ec681f3Smrg} 5977ec681f3Smrg 5987ec681f3SmrgLLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, 5997ec681f3Smrg unsigned value_count) 6007ec681f3Smrg{ 6017ec681f3Smrg return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false); 6027ec681f3Smrg} 6037ec681f3Smrg 6047ec681f3SmrgLLVMValueRef ac_build_concat(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) 6057ec681f3Smrg{ 6067ec681f3Smrg unsigned a_size = ac_get_llvm_num_components(a); 6077ec681f3Smrg unsigned b_size = ac_get_llvm_num_components(b); 6087ec681f3Smrg 6097ec681f3Smrg LLVMValueRef *elems = alloca((a_size + b_size) * sizeof(LLVMValueRef)); 6107ec681f3Smrg for (unsigned i = 0; i < a_size; i++) 6117ec681f3Smrg elems[i] = ac_llvm_extract_elem(ctx, a, i); 6127ec681f3Smrg for (unsigned i = 0; i < b_size; i++) 6137ec681f3Smrg elems[a_size + i] = ac_llvm_extract_elem(ctx, b, i); 6147ec681f3Smrg 6157ec681f3Smrg return ac_build_gather_values(ctx, elems, a_size + b_size); 6167ec681f3Smrg} 6177ec681f3Smrg 6187ec681f3Smrg/* Expand a scalar or vector to <dst_channels x type> by filling the remaining 6197ec681f3Smrg * channels with undef. Extract at most src_channels components from the input. 6207ec681f3Smrg */ 6217ec681f3SmrgLLVMValueRef ac_build_expand(struct ac_llvm_context *ctx, LLVMValueRef value, 6227ec681f3Smrg unsigned src_channels, unsigned dst_channels) 6237ec681f3Smrg{ 6247ec681f3Smrg LLVMTypeRef elemtype; 6257ec681f3Smrg LLVMValueRef *const chan = alloca(dst_channels * sizeof(LLVMValueRef)); 6267ec681f3Smrg 6277ec681f3Smrg if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) { 6287ec681f3Smrg unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value)); 6297ec681f3Smrg 6307ec681f3Smrg if (src_channels == dst_channels && vec_size == dst_channels) 6317ec681f3Smrg return value; 6327ec681f3Smrg 6337ec681f3Smrg src_channels = MIN2(src_channels, vec_size); 6347ec681f3Smrg 6357ec681f3Smrg for (unsigned i = 0; i < src_channels; i++) 6367ec681f3Smrg chan[i] = ac_llvm_extract_elem(ctx, value, i); 6377ec681f3Smrg 6387ec681f3Smrg elemtype = LLVMGetElementType(LLVMTypeOf(value)); 6397ec681f3Smrg } else { 6407ec681f3Smrg if (src_channels) { 6417ec681f3Smrg assert(src_channels == 1); 6427ec681f3Smrg chan[0] = value; 6437ec681f3Smrg } 6447ec681f3Smrg elemtype = LLVMTypeOf(value); 6457ec681f3Smrg } 6467ec681f3Smrg 6477ec681f3Smrg for (unsigned i = src_channels; i < dst_channels; i++) 6487ec681f3Smrg chan[i] = LLVMGetUndef(elemtype); 6497ec681f3Smrg 6507ec681f3Smrg return ac_build_gather_values(ctx, chan, dst_channels); 6517ec681f3Smrg} 6527ec681f3Smrg 6537ec681f3Smrg/* Extract components [start, start + channels) from a vector. 6547ec681f3Smrg */ 6557ec681f3SmrgLLVMValueRef ac_extract_components(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned start, 6567ec681f3Smrg unsigned channels) 6577ec681f3Smrg{ 6587ec681f3Smrg LLVMValueRef *const chan = alloca(channels * sizeof(LLVMValueRef)); 6597ec681f3Smrg 6607ec681f3Smrg for (unsigned i = 0; i < channels; i++) 6617ec681f3Smrg chan[i] = ac_llvm_extract_elem(ctx, value, i + start); 6627ec681f3Smrg 6637ec681f3Smrg return ac_build_gather_values(ctx, chan, channels); 6647ec681f3Smrg} 6657ec681f3Smrg 6667ec681f3Smrg/* Expand a scalar or vector to <4 x type> by filling the remaining channels 6677ec681f3Smrg * with undef. Extract at most num_channels components from the input. 6687ec681f3Smrg */ 6697ec681f3SmrgLLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value, 6707ec681f3Smrg unsigned num_channels) 6717ec681f3Smrg{ 6727ec681f3Smrg return ac_build_expand(ctx, value, num_channels, 4); 6737ec681f3Smrg} 6747ec681f3Smrg 6757ec681f3SmrgLLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value) 6767ec681f3Smrg{ 6777ec681f3Smrg unsigned type_size = ac_get_type_size(LLVMTypeOf(value)); 6787ec681f3Smrg const char *name; 6797ec681f3Smrg 6807ec681f3Smrg if (type_size == 2) 6817ec681f3Smrg name = "llvm.rint.f16"; 6827ec681f3Smrg else if (type_size == 4) 6837ec681f3Smrg name = "llvm.rint.f32"; 6847ec681f3Smrg else 6857ec681f3Smrg name = "llvm.rint.f64"; 6867ec681f3Smrg 6877ec681f3Smrg return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1, AC_FUNC_ATTR_READNONE); 6887ec681f3Smrg} 6897ec681f3Smrg 6907ec681f3SmrgLLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) 6917ec681f3Smrg{ 6927ec681f3Smrg unsigned type_size = ac_get_type_size(LLVMTypeOf(den)); 6937ec681f3Smrg const char *name; 6947ec681f3Smrg 6957ec681f3Smrg /* For doubles, we need precise division to pass GLCTS. */ 6967ec681f3Smrg if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && type_size == 8) 6977ec681f3Smrg return LLVMBuildFDiv(ctx->builder, num, den, ""); 6987ec681f3Smrg 6997ec681f3Smrg if (type_size == 2) 7007ec681f3Smrg name = "llvm.amdgcn.rcp.f16"; 7017ec681f3Smrg else if (type_size == 4) 7027ec681f3Smrg name = "llvm.amdgcn.rcp.f32"; 7037ec681f3Smrg else 7047ec681f3Smrg name = "llvm.amdgcn.rcp.f64"; 7057ec681f3Smrg 7067ec681f3Smrg LLVMValueRef rcp = 7077ec681f3Smrg ac_build_intrinsic(ctx, name, LLVMTypeOf(den), &den, 1, AC_FUNC_ATTR_READNONE); 7087ec681f3Smrg 7097ec681f3Smrg return LLVMBuildFMul(ctx->builder, num, rcp, ""); 7107ec681f3Smrg} 7117ec681f3Smrg 7127ec681f3Smrg/* See fast_idiv_by_const.h. */ 7137ec681f3Smrg/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */ 7147ec681f3SmrgLLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, LLVMValueRef num, 7157ec681f3Smrg LLVMValueRef multiplier, LLVMValueRef pre_shift, 7167ec681f3Smrg LLVMValueRef post_shift, LLVMValueRef increment) 7177ec681f3Smrg{ 7187ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 7197ec681f3Smrg 7207ec681f3Smrg num = LLVMBuildLShr(builder, num, pre_shift, ""); 7217ec681f3Smrg num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""), 7227ec681f3Smrg LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); 7237ec681f3Smrg num = LLVMBuildAdd(builder, num, LLVMBuildZExt(builder, increment, ctx->i64, ""), ""); 7247ec681f3Smrg num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); 7257ec681f3Smrg num = LLVMBuildTrunc(builder, num, ctx->i32, ""); 7267ec681f3Smrg return LLVMBuildLShr(builder, num, post_shift, ""); 7277ec681f3Smrg} 7287ec681f3Smrg 7297ec681f3Smrg/* See fast_idiv_by_const.h. */ 7307ec681f3Smrg/* If num != UINT_MAX, this more efficient version can be used. */ 7317ec681f3Smrg/* Set: increment = util_fast_udiv_info::increment; */ 7327ec681f3SmrgLLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef num, 7337ec681f3Smrg LLVMValueRef multiplier, LLVMValueRef pre_shift, 7347ec681f3Smrg LLVMValueRef post_shift, LLVMValueRef increment) 7357ec681f3Smrg{ 7367ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 7377ec681f3Smrg 7387ec681f3Smrg num = LLVMBuildLShr(builder, num, pre_shift, ""); 7397ec681f3Smrg num = LLVMBuildNUWAdd(builder, num, increment, ""); 7407ec681f3Smrg num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""), 7417ec681f3Smrg LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); 7427ec681f3Smrg num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); 7437ec681f3Smrg num = LLVMBuildTrunc(builder, num, ctx->i32, ""); 7447ec681f3Smrg return LLVMBuildLShr(builder, num, post_shift, ""); 7457ec681f3Smrg} 7467ec681f3Smrg 7477ec681f3Smrg/* See fast_idiv_by_const.h. */ 7487ec681f3Smrg/* Both operands must fit in 31 bits and the divisor must not be 1. */ 7497ec681f3SmrgLLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num, 7507ec681f3Smrg LLVMValueRef multiplier, LLVMValueRef post_shift) 7517ec681f3Smrg{ 7527ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 7537ec681f3Smrg 7547ec681f3Smrg num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""), 7557ec681f3Smrg LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); 7567ec681f3Smrg num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); 7577ec681f3Smrg num = LLVMBuildTrunc(builder, num, ctx->i32, ""); 7587ec681f3Smrg return LLVMBuildLShr(builder, num, post_shift, ""); 7597ec681f3Smrg} 7607ec681f3Smrg 7617ec681f3Smrg/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 7627ec681f3Smrg * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is 7637ec681f3Smrg * already multiplied by two. id is the cube face number. 7647ec681f3Smrg */ 7657ec681f3Smrgstruct cube_selection_coords { 7667ec681f3Smrg LLVMValueRef stc[2]; 7677ec681f3Smrg LLVMValueRef ma; 7687ec681f3Smrg LLVMValueRef id; 7697ec681f3Smrg}; 7707ec681f3Smrg 7717ec681f3Smrgstatic void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3], 7727ec681f3Smrg struct cube_selection_coords *out) 7737ec681f3Smrg{ 7747ec681f3Smrg LLVMTypeRef f32 = ctx->f32; 7757ec681f3Smrg 7767ec681f3Smrg out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, AC_FUNC_ATTR_READNONE); 7777ec681f3Smrg out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, AC_FUNC_ATTR_READNONE); 7787ec681f3Smrg out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, AC_FUNC_ATTR_READNONE); 7797ec681f3Smrg out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, AC_FUNC_ATTR_READNONE); 7807ec681f3Smrg} 7817ec681f3Smrg 7827ec681f3Smrg/** 7837ec681f3Smrg * Build a manual selection sequence for cube face sc/tc coordinates and 7847ec681f3Smrg * major axis vector (multiplied by 2 for consistency) for the given 7857ec681f3Smrg * vec3 \p coords, for the face implied by \p selcoords. 7867ec681f3Smrg * 7877ec681f3Smrg * For the major axis, we always adjust the sign to be in the direction of 7887ec681f3Smrg * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards 7897ec681f3Smrg * the selcoords major axis. 7907ec681f3Smrg */ 7917ec681f3Smrgstatic void build_cube_select(struct ac_llvm_context *ctx, 7927ec681f3Smrg const struct cube_selection_coords *selcoords, 7937ec681f3Smrg const LLVMValueRef *coords, LLVMValueRef *out_st, 7947ec681f3Smrg LLVMValueRef *out_ma) 7957ec681f3Smrg{ 7967ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 7977ec681f3Smrg LLVMTypeRef f32 = LLVMTypeOf(coords[0]); 7987ec681f3Smrg LLVMValueRef is_ma_positive; 7997ec681f3Smrg LLVMValueRef sgn_ma; 8007ec681f3Smrg LLVMValueRef is_ma_z, is_not_ma_z; 8017ec681f3Smrg LLVMValueRef is_ma_y; 8027ec681f3Smrg LLVMValueRef is_ma_x; 8037ec681f3Smrg LLVMValueRef sgn; 8047ec681f3Smrg LLVMValueRef tmp; 8057ec681f3Smrg 8067ec681f3Smrg is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), ""); 8077ec681f3Smrg sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0), 8087ec681f3Smrg LLVMConstReal(f32, -1.0), ""); 8097ec681f3Smrg 8107ec681f3Smrg is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); 8117ec681f3Smrg is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); 8127ec681f3Smrg is_ma_y = LLVMBuildAnd( 8137ec681f3Smrg builder, is_not_ma_z, 8147ec681f3Smrg LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); 8157ec681f3Smrg is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); 8167ec681f3Smrg 8177ec681f3Smrg /* Select sc */ 8187ec681f3Smrg tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], ""); 8197ec681f3Smrg sgn = LLVMBuildSelect( 8207ec681f3Smrg builder, is_ma_y, LLVMConstReal(f32, 1.0), 8217ec681f3Smrg LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); 8227ec681f3Smrg out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); 8237ec681f3Smrg 8247ec681f3Smrg /* Select tc */ 8257ec681f3Smrg tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); 8267ec681f3Smrg sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), ""); 8277ec681f3Smrg out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); 8287ec681f3Smrg 8297ec681f3Smrg /* Select ma */ 8307ec681f3Smrg tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], 8317ec681f3Smrg LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); 8327ec681f3Smrg tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE); 8337ec681f3Smrg *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), ""); 8347ec681f3Smrg} 8357ec681f3Smrg 8367ec681f3Smrgvoid ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod, 8377ec681f3Smrg LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg) 8387ec681f3Smrg{ 8397ec681f3Smrg 8407ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 8417ec681f3Smrg struct cube_selection_coords selcoords; 8427ec681f3Smrg LLVMValueRef coords[3]; 8437ec681f3Smrg LLVMValueRef invma; 8447ec681f3Smrg 8457ec681f3Smrg if (is_array && !is_lod) { 8467ec681f3Smrg LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]); 8477ec681f3Smrg 8487ec681f3Smrg /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says: 8497ec681f3Smrg * 8507ec681f3Smrg * "For Array forms, the array layer used will be 8517ec681f3Smrg * 8527ec681f3Smrg * max(0, min(d−1, floor(layer+0.5))) 8537ec681f3Smrg * 8547ec681f3Smrg * where d is the depth of the texture array and layer 8557ec681f3Smrg * comes from the component indicated in the tables below. 8567ec681f3Smrg * Workaroudn for an issue where the layer is taken from a 8577ec681f3Smrg * helper invocation which happens to fall on a different 8587ec681f3Smrg * layer due to extrapolation." 8597ec681f3Smrg * 8607ec681f3Smrg * GFX8 and earlier attempt to implement this in hardware by 8617ec681f3Smrg * clamping the value of coords[2] = (8 * layer) + face. 8627ec681f3Smrg * Unfortunately, this means that the we end up with the wrong 8637ec681f3Smrg * face when clamping occurs. 8647ec681f3Smrg * 8657ec681f3Smrg * Clamp the layer earlier to work around the issue. 8667ec681f3Smrg */ 8677ec681f3Smrg if (ctx->chip_class <= GFX8) { 8687ec681f3Smrg LLVMValueRef ge0; 8697ec681f3Smrg ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); 8707ec681f3Smrg tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); 8717ec681f3Smrg } 8727ec681f3Smrg 8737ec681f3Smrg coords_arg[3] = tmp; 8747ec681f3Smrg } 8757ec681f3Smrg 8767ec681f3Smrg build_cube_intrinsic(ctx, coords_arg, &selcoords); 8777ec681f3Smrg 8787ec681f3Smrg invma = 8797ec681f3Smrg ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); 8807ec681f3Smrg invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); 8817ec681f3Smrg 8827ec681f3Smrg for (int i = 0; i < 2; ++i) 8837ec681f3Smrg coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); 8847ec681f3Smrg 8857ec681f3Smrg coords[2] = selcoords.id; 8867ec681f3Smrg 8877ec681f3Smrg if (is_deriv && derivs_arg) { 8887ec681f3Smrg LLVMValueRef derivs[4]; 8897ec681f3Smrg int axis; 8907ec681f3Smrg 8917ec681f3Smrg /* Convert cube derivatives to 2D derivatives. */ 8927ec681f3Smrg for (axis = 0; axis < 2; axis++) { 8937ec681f3Smrg LLVMValueRef deriv_st[2]; 8947ec681f3Smrg LLVMValueRef deriv_ma; 8957ec681f3Smrg 8967ec681f3Smrg /* Transform the derivative alongside the texture 8977ec681f3Smrg * coordinate. Mathematically, the correct formula is 8987ec681f3Smrg * as follows. Assume we're projecting onto the +Z face 8997ec681f3Smrg * and denote by dx/dh the derivative of the (original) 9007ec681f3Smrg * X texture coordinate with respect to horizontal 9017ec681f3Smrg * window coordinates. The projection onto the +Z face 9027ec681f3Smrg * plane is: 9037ec681f3Smrg * 9047ec681f3Smrg * f(x,z) = x/z 9057ec681f3Smrg * 9067ec681f3Smrg * Then df/dh = df/dx * dx/dh + df/dz * dz/dh 9077ec681f3Smrg * = 1/z * dx/dh - x/z * 1/z * dz/dh. 9087ec681f3Smrg * 9097ec681f3Smrg * This motivatives the implementation below. 9107ec681f3Smrg * 9117ec681f3Smrg * Whether this actually gives the expected results for 9127ec681f3Smrg * apps that might feed in derivatives obtained via 9137ec681f3Smrg * finite differences is anyone's guess. The OpenGL spec 9147ec681f3Smrg * seems awfully quiet about how textureGrad for cube 9157ec681f3Smrg * maps should be handled. 9167ec681f3Smrg */ 9177ec681f3Smrg build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma); 9187ec681f3Smrg 9197ec681f3Smrg deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); 9207ec681f3Smrg 9217ec681f3Smrg for (int i = 0; i < 2; ++i) 9227ec681f3Smrg derivs[axis * 2 + i] = 9237ec681f3Smrg LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""), 9247ec681f3Smrg LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); 9257ec681f3Smrg } 9267ec681f3Smrg 9277ec681f3Smrg memcpy(derivs_arg, derivs, sizeof(derivs)); 9287ec681f3Smrg } 9297ec681f3Smrg 9307ec681f3Smrg /* Shift the texture coordinate. This must be applied after the 9317ec681f3Smrg * derivative calculation. 9327ec681f3Smrg */ 9337ec681f3Smrg for (int i = 0; i < 2; ++i) 9347ec681f3Smrg coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); 9357ec681f3Smrg 9367ec681f3Smrg if (is_array) { 9377ec681f3Smrg /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ 9387ec681f3Smrg /* coords_arg.w component - array_index for cube arrays */ 9397ec681f3Smrg coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]); 9407ec681f3Smrg } 9417ec681f3Smrg 9427ec681f3Smrg memcpy(coords_arg, coords, sizeof(coords)); 9437ec681f3Smrg} 9447ec681f3Smrg 9457ec681f3SmrgLLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, 9467ec681f3Smrg LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, 9477ec681f3Smrg LLVMValueRef j) 9487ec681f3Smrg{ 9497ec681f3Smrg LLVMValueRef args[5]; 9507ec681f3Smrg LLVMValueRef p1; 9517ec681f3Smrg 9527ec681f3Smrg args[0] = i; 9537ec681f3Smrg args[1] = llvm_chan; 9547ec681f3Smrg args[2] = attr_number; 9557ec681f3Smrg args[3] = params; 9567ec681f3Smrg 9577ec681f3Smrg p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); 9587ec681f3Smrg 9597ec681f3Smrg args[0] = p1; 9607ec681f3Smrg args[1] = j; 9617ec681f3Smrg args[2] = llvm_chan; 9627ec681f3Smrg args[3] = attr_number; 9637ec681f3Smrg args[4] = params; 9647ec681f3Smrg 9657ec681f3Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", ctx->f32, args, 5, 9667ec681f3Smrg AC_FUNC_ATTR_READNONE); 9677ec681f3Smrg} 9687ec681f3Smrg 9697ec681f3SmrgLLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, 9707ec681f3Smrg LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, 9717ec681f3Smrg LLVMValueRef j, bool high_16bits) 9727ec681f3Smrg{ 9737ec681f3Smrg LLVMValueRef args[6]; 9747ec681f3Smrg LLVMValueRef p1; 9757ec681f3Smrg 9767ec681f3Smrg args[0] = i; 9777ec681f3Smrg args[1] = llvm_chan; 9787ec681f3Smrg args[2] = attr_number; 9797ec681f3Smrg args[3] = high_16bits ? ctx->i1true : ctx->i1false; 9807ec681f3Smrg args[4] = params; 9817ec681f3Smrg 9827ec681f3Smrg p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", ctx->f32, args, 5, 9837ec681f3Smrg AC_FUNC_ATTR_READNONE); 9847ec681f3Smrg 9857ec681f3Smrg args[0] = p1; 9867ec681f3Smrg args[1] = j; 9877ec681f3Smrg args[2] = llvm_chan; 9887ec681f3Smrg args[3] = attr_number; 9897ec681f3Smrg args[4] = high_16bits ? ctx->i1true : ctx->i1false; 9907ec681f3Smrg args[5] = params; 9917ec681f3Smrg 9927ec681f3Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", ctx->f16, args, 6, 9937ec681f3Smrg AC_FUNC_ATTR_READNONE); 9947ec681f3Smrg} 9957ec681f3Smrg 9967ec681f3SmrgLLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter, 9977ec681f3Smrg LLVMValueRef llvm_chan, LLVMValueRef attr_number, 9987ec681f3Smrg LLVMValueRef params) 9997ec681f3Smrg{ 10007ec681f3Smrg LLVMValueRef args[4]; 10017ec681f3Smrg 10027ec681f3Smrg args[0] = parameter; 10037ec681f3Smrg args[1] = llvm_chan; 10047ec681f3Smrg args[2] = attr_number; 10057ec681f3Smrg args[3] = params; 10067ec681f3Smrg 10077ec681f3Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", ctx->f32, args, 4, 10087ec681f3Smrg AC_FUNC_ATTR_READNONE); 10097ec681f3Smrg} 10107ec681f3Smrg 10117ec681f3SmrgLLVMValueRef ac_build_gep_ptr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 10127ec681f3Smrg LLVMValueRef index) 10137ec681f3Smrg{ 10147ec681f3Smrg return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); 10157ec681f3Smrg} 10167ec681f3Smrg 10177ec681f3SmrgLLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) 10187ec681f3Smrg{ 10197ec681f3Smrg LLVMValueRef indices[2] = { 10207ec681f3Smrg ctx->i32_0, 10217ec681f3Smrg index, 10227ec681f3Smrg }; 10237ec681f3Smrg return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, ""); 10247ec681f3Smrg} 10257ec681f3Smrg 10267ec681f3SmrgLLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMValueRef index) 10277ec681f3Smrg{ 10287ec681f3Smrg return LLVMBuildPointerCast(ctx->builder, LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""), 10297ec681f3Smrg LLVMTypeOf(ptr), ""); 10307ec681f3Smrg} 10317ec681f3Smrg 10327ec681f3Smrgvoid ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index, 10337ec681f3Smrg LLVMValueRef value) 10347ec681f3Smrg{ 10357ec681f3Smrg LLVMBuildStore(ctx->builder, value, ac_build_gep0(ctx, base_ptr, index)); 10367ec681f3Smrg} 10377ec681f3Smrg 10387ec681f3Smrg/** 10397ec681f3Smrg * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. 10407ec681f3Smrg * It's equivalent to doing a load from &base_ptr[index]. 10417ec681f3Smrg * 10427ec681f3Smrg * \param base_ptr Where the array starts. 10437ec681f3Smrg * \param index The element index into the array. 10447ec681f3Smrg * \param uniform Whether the base_ptr and index can be assumed to be 10457ec681f3Smrg * dynamically uniform (i.e. load to an SGPR) 10467ec681f3Smrg * \param invariant Whether the load is invariant (no other opcodes affect it) 10477ec681f3Smrg * \param no_unsigned_wraparound 10487ec681f3Smrg * For all possible re-associations and re-distributions of an expression 10497ec681f3Smrg * "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs 10507ec681f3Smrg * without inbounds in base_ptr), this parameter is true if "addr + offset" 10517ec681f3Smrg * does not result in an unsigned integer wraparound. This is used for 10527ec681f3Smrg * optimal code generation of 32-bit pointer arithmetic. 10537ec681f3Smrg * 10547ec681f3Smrg * For example, a 32-bit immediate offset that causes a 32-bit unsigned 10557ec681f3Smrg * integer wraparound can't be an imm offset in s_load_dword, because 10567ec681f3Smrg * the instruction performs "addr + offset" in 64 bits. 10577ec681f3Smrg * 10587ec681f3Smrg * Expected usage for bindless textures by chaining GEPs: 10597ec681f3Smrg * // possible unsigned wraparound, don't use InBounds: 10607ec681f3Smrg * ptr1 = LLVMBuildGEP(base_ptr, index); 10617ec681f3Smrg * image = load(ptr1); // becomes "s_load ptr1, 0" 10627ec681f3Smrg * 10637ec681f3Smrg * ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize); 10647ec681f3Smrg * sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds 10657ec681f3Smrg */ 10667ec681f3Smrgstatic LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 10677ec681f3Smrg LLVMValueRef index, bool uniform, bool invariant, 10687ec681f3Smrg bool no_unsigned_wraparound) 10697ec681f3Smrg{ 10707ec681f3Smrg LLVMValueRef pointer, result; 10717ec681f3Smrg 10727ec681f3Smrg if (no_unsigned_wraparound && 10737ec681f3Smrg LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT) 10747ec681f3Smrg pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, ""); 10757ec681f3Smrg else 10767ec681f3Smrg pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); 10777ec681f3Smrg 10787ec681f3Smrg if (uniform) 10797ec681f3Smrg LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); 10807ec681f3Smrg result = LLVMBuildLoad(ctx->builder, pointer, ""); 10817ec681f3Smrg if (invariant) 10827ec681f3Smrg LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); 10837ec681f3Smrg LLVMSetAlignment(result, 4); 10847ec681f3Smrg return result; 10857ec681f3Smrg} 10867ec681f3Smrg 10877ec681f3SmrgLLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) 10887ec681f3Smrg{ 10897ec681f3Smrg return ac_build_load_custom(ctx, base_ptr, index, false, false, false); 10907ec681f3Smrg} 10917ec681f3Smrg 10927ec681f3SmrgLLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 10937ec681f3Smrg LLVMValueRef index) 10947ec681f3Smrg{ 10957ec681f3Smrg return ac_build_load_custom(ctx, base_ptr, index, false, true, false); 10967ec681f3Smrg} 10977ec681f3Smrg 10987ec681f3Smrg/* This assumes that there is no unsigned integer wraparound during the address 10997ec681f3Smrg * computation, excluding all GEPs within base_ptr. */ 11007ec681f3SmrgLLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 11017ec681f3Smrg LLVMValueRef index) 11027ec681f3Smrg{ 11037ec681f3Smrg return ac_build_load_custom(ctx, base_ptr, index, true, true, true); 11047ec681f3Smrg} 11057ec681f3Smrg 11067ec681f3Smrg/* See ac_build_load_custom() documentation. */ 11077ec681f3SmrgLLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, 11087ec681f3Smrg LLVMValueRef base_ptr, LLVMValueRef index) 11097ec681f3Smrg{ 11107ec681f3Smrg return ac_build_load_custom(ctx, base_ptr, index, true, true, false); 11117ec681f3Smrg} 11127ec681f3Smrg 11137ec681f3Smrgstatic unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy) 11147ec681f3Smrg{ 11157ec681f3Smrg return cache_policy | (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0); 11167ec681f3Smrg} 11177ec681f3Smrg 11187ec681f3Smrgstatic void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 11197ec681f3Smrg LLVMValueRef data, LLVMValueRef vindex, 11207ec681f3Smrg LLVMValueRef voffset, LLVMValueRef soffset, 11217ec681f3Smrg unsigned cache_policy, bool use_format, bool structurized) 11227ec681f3Smrg{ 11237ec681f3Smrg LLVMValueRef args[6]; 11247ec681f3Smrg int idx = 0; 11257ec681f3Smrg args[idx++] = data; 11267ec681f3Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 11277ec681f3Smrg if (structurized) 11287ec681f3Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 11297ec681f3Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 11307ec681f3Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 11317ec681f3Smrg args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); 11327ec681f3Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 11337ec681f3Smrg char name[256], type_name[8]; 11347ec681f3Smrg 11357ec681f3Smrg ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name)); 11367ec681f3Smrg 11377ec681f3Smrg if (use_format) { 11387ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", indexing_kind, 11397ec681f3Smrg type_name); 11407ec681f3Smrg } else { 11417ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s", indexing_kind, type_name); 11427ec681f3Smrg } 11437ec681f3Smrg 11447ec681f3Smrg ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); 11457ec681f3Smrg} 11467ec681f3Smrg 11477ec681f3Smrgvoid ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data, 11487ec681f3Smrg LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy) 11497ec681f3Smrg{ 11507ec681f3Smrg ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true, true); 11517ec681f3Smrg} 11527ec681f3Smrg 11537ec681f3Smrg/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. 11547ec681f3Smrg * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), 11557ec681f3Smrg * or v4i32 (num_channels=3,4). 11567ec681f3Smrg */ 11577ec681f3Smrgvoid ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, 11587ec681f3Smrg unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset, 11597ec681f3Smrg unsigned inst_offset, unsigned cache_policy) 11607ec681f3Smrg{ 11617ec681f3Smrg /* Split 3 channel stores. */ 11627ec681f3Smrg if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) { 11637ec681f3Smrg LLVMValueRef v[3], v01; 11647ec681f3Smrg 11657ec681f3Smrg for (int i = 0; i < 3; i++) { 11667ec681f3Smrg v[i] = LLVMBuildExtractElement(ctx->builder, vdata, LLVMConstInt(ctx->i32, i, 0), ""); 11677ec681f3Smrg } 11687ec681f3Smrg v01 = ac_build_gather_values(ctx, v, 2); 11697ec681f3Smrg 11707ec681f3Smrg ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, soffset, inst_offset, cache_policy); 11717ec681f3Smrg ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, soffset, inst_offset + 8, 11727ec681f3Smrg cache_policy); 11737ec681f3Smrg return; 11747ec681f3Smrg } 11757ec681f3Smrg 11767ec681f3Smrg /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset 11777ec681f3Smrg * (voffset is swizzled, but soffset isn't swizzled). 11787ec681f3Smrg * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. 11797ec681f3Smrg */ 11807ec681f3Smrg if (!(cache_policy & ac_swizzled)) { 11817ec681f3Smrg LLVMValueRef offset = soffset; 11827ec681f3Smrg 11837ec681f3Smrg if (inst_offset) 11847ec681f3Smrg offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, inst_offset, 0), ""); 11857ec681f3Smrg 11867ec681f3Smrg ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), ctx->i32_0, voffset, offset, 11877ec681f3Smrg cache_policy, false, false); 11887ec681f3Smrg return; 11897ec681f3Smrg } 11907ec681f3Smrg 11917ec681f3Smrg static const unsigned dfmts[] = {V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32, 11927ec681f3Smrg V_008F0C_BUF_DATA_FORMAT_32_32_32, 11937ec681f3Smrg V_008F0C_BUF_DATA_FORMAT_32_32_32_32}; 11947ec681f3Smrg unsigned dfmt = dfmts[num_channels - 1]; 11957ec681f3Smrg unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; 11967ec681f3Smrg LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0); 11977ec681f3Smrg 11987ec681f3Smrg ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, immoffset, num_channels, dfmt, 11997ec681f3Smrg nfmt, cache_policy); 12007ec681f3Smrg} 12017ec681f3Smrg 12027ec681f3Smrgstatic LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 12037ec681f3Smrg LLVMValueRef vindex, LLVMValueRef voffset, 12047ec681f3Smrg LLVMValueRef soffset, unsigned num_channels, 12057ec681f3Smrg LLVMTypeRef channel_type, unsigned cache_policy, 12067ec681f3Smrg bool can_speculate, bool use_format, 12077ec681f3Smrg bool structurized) 12087ec681f3Smrg{ 12097ec681f3Smrg LLVMValueRef args[5]; 12107ec681f3Smrg int idx = 0; 12117ec681f3Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 12127ec681f3Smrg if (structurized) 12137ec681f3Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 12147ec681f3Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 12157ec681f3Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 12167ec681f3Smrg args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0); 12177ec681f3Smrg unsigned func = 12187ec681f3Smrg !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; 12197ec681f3Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 12207ec681f3Smrg char name[256], type_name[8]; 12217ec681f3Smrg 12227ec681f3Smrg /* D16 is only supported on gfx8+ */ 12237ec681f3Smrg assert(!use_format || (channel_type != ctx->f16 && channel_type != ctx->i16) || 12247ec681f3Smrg ctx->chip_class >= GFX8); 12257ec681f3Smrg 12267ec681f3Smrg LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type; 12277ec681f3Smrg ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); 12287ec681f3Smrg 12297ec681f3Smrg if (use_format) { 12307ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", indexing_kind, 12317ec681f3Smrg type_name); 12327ec681f3Smrg } else { 12337ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", indexing_kind, type_name); 12347ec681f3Smrg } 12357ec681f3Smrg 12367ec681f3Smrg return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate)); 12377ec681f3Smrg} 12387ec681f3Smrg 12397ec681f3SmrgLLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels, 12407ec681f3Smrg LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, 12417ec681f3Smrg unsigned inst_offset, LLVMTypeRef channel_type, 12427ec681f3Smrg unsigned cache_policy, bool can_speculate, bool allow_smem) 12437ec681f3Smrg{ 12447ec681f3Smrg LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0); 12457ec681f3Smrg if (voffset) 12467ec681f3Smrg offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); 12477ec681f3Smrg if (soffset) 12487ec681f3Smrg offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); 12497ec681f3Smrg 12507ec681f3Smrg if (allow_smem && !(cache_policy & ac_slc) && 12517ec681f3Smrg (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) { 12527ec681f3Smrg assert(vindex == NULL); 12537ec681f3Smrg 12547ec681f3Smrg LLVMValueRef result[8]; 12557ec681f3Smrg 12567ec681f3Smrg for (int i = 0; i < num_channels; i++) { 12577ec681f3Smrg if (i) { 12587ec681f3Smrg offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), ""); 12597ec681f3Smrg } 12607ec681f3Smrg LLVMValueRef args[3] = { 12617ec681f3Smrg rsrc, 12627ec681f3Smrg offset, 12637ec681f3Smrg LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0), 12647ec681f3Smrg }; 12657ec681f3Smrg result[i] = ac_build_intrinsic(ctx, "llvm.amdgcn.s.buffer.load.f32", ctx->f32, args, 3, 12667ec681f3Smrg AC_FUNC_ATTR_READNONE); 12677ec681f3Smrg } 12687ec681f3Smrg if (num_channels == 1) 12697ec681f3Smrg return result[0]; 12707ec681f3Smrg 12717ec681f3Smrg if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) 12727ec681f3Smrg result[num_channels++] = LLVMGetUndef(ctx->f32); 12737ec681f3Smrg return ac_build_gather_values(ctx, result, num_channels); 12747ec681f3Smrg } 12757ec681f3Smrg 12767ec681f3Smrg return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, ctx->i32_0, num_channels, 12777ec681f3Smrg channel_type, cache_policy, can_speculate, false, false); 12787ec681f3Smrg} 12797ec681f3Smrg 12807ec681f3SmrgLLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 12817ec681f3Smrg LLVMValueRef vindex, LLVMValueRef voffset, 12827ec681f3Smrg unsigned num_channels, unsigned cache_policy, 12837ec681f3Smrg bool can_speculate, bool d16, bool tfe) 12847ec681f3Smrg{ 12857ec681f3Smrg if (tfe) { 12867ec681f3Smrg assert(!d16); 12877ec681f3Smrg 12887ec681f3Smrg char code[256]; 12897ec681f3Smrg /* The definition in the assembly and the one in the constraint string 12907ec681f3Smrg * differs because of an assembler bug. 12917ec681f3Smrg */ 12927ec681f3Smrg snprintf(code, sizeof(code), 12937ec681f3Smrg "v_mov_b32 v0, 0\n" 12947ec681f3Smrg "v_mov_b32 v1, 0\n" 12957ec681f3Smrg "v_mov_b32 v2, 0\n" 12967ec681f3Smrg "v_mov_b32 v3, 0\n" 12977ec681f3Smrg "v_mov_b32 v4, 0\n" 12987ec681f3Smrg "buffer_load_format_xyzw v[0:3], $1, $2, 0, idxen offen %s %s tfe %s\n" 12997ec681f3Smrg "s_waitcnt vmcnt(0)", 13007ec681f3Smrg cache_policy & ac_glc ? "glc" : "", 13017ec681f3Smrg cache_policy & ac_slc ? "slc" : "", 13027ec681f3Smrg cache_policy & ac_dlc ? "dlc" : ""); 13037ec681f3Smrg 13047ec681f3Smrg LLVMTypeRef param_types[] = {ctx->v2i32, ctx->v4i32}; 13057ec681f3Smrg LLVMTypeRef calltype = LLVMFunctionType(LLVMVectorType(ctx->f32, 5), param_types, 2, false); 13067ec681f3Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(calltype, code, "=&{v[0:4]},v,s", false, false); 13077ec681f3Smrg 13087ec681f3Smrg LLVMValueRef addr_comp[2] = {vindex ? vindex : ctx->i32_0, 13097ec681f3Smrg voffset ? voffset : ctx->i32_0}; 13107ec681f3Smrg 13117ec681f3Smrg LLVMValueRef args[] = {ac_build_gather_values(ctx, addr_comp, 2), 13127ec681f3Smrg LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "")}; 13137ec681f3Smrg LLVMValueRef res = LLVMBuildCall(ctx->builder, inlineasm, args, 2, ""); 13147ec681f3Smrg 13157ec681f3Smrg return ac_build_concat(ctx, ac_trim_vector(ctx, res, num_channels), 13167ec681f3Smrg ac_llvm_extract_elem(ctx, res, 4)); 13177ec681f3Smrg } 13187ec681f3Smrg 13197ec681f3Smrg return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, num_channels, 13207ec681f3Smrg d16 ? ctx->f16 : ctx->f32, cache_policy, can_speculate, true, 13217ec681f3Smrg true); 13227ec681f3Smrg} 13237ec681f3Smrg 13247ec681f3Smrgstatic LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 13257ec681f3Smrg LLVMValueRef vindex, LLVMValueRef voffset, 13267ec681f3Smrg LLVMValueRef soffset, LLVMValueRef immoffset, 13277ec681f3Smrg unsigned num_channels, unsigned dfmt, unsigned nfmt, 13287ec681f3Smrg unsigned cache_policy, bool can_speculate, 13297ec681f3Smrg bool structurized) 13307ec681f3Smrg{ 13317ec681f3Smrg voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); 13327ec681f3Smrg 13337ec681f3Smrg LLVMValueRef args[6]; 13347ec681f3Smrg int idx = 0; 13357ec681f3Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 13367ec681f3Smrg if (structurized) 13377ec681f3Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 13387ec681f3Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 13397ec681f3Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 13407ec681f3Smrg args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0); 13417ec681f3Smrg args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0); 13427ec681f3Smrg unsigned func = 13437ec681f3Smrg !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; 13447ec681f3Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 13457ec681f3Smrg char name[256], type_name[8]; 13467ec681f3Smrg 13477ec681f3Smrg LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; 13487ec681f3Smrg ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); 13497ec681f3Smrg 13507ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s", indexing_kind, type_name); 13517ec681f3Smrg 13527ec681f3Smrg return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate)); 13537ec681f3Smrg} 13547ec681f3Smrg 13557ec681f3SmrgLLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 13567ec681f3Smrg LLVMValueRef vindex, LLVMValueRef voffset, 13577ec681f3Smrg LLVMValueRef soffset, LLVMValueRef immoffset, 13587ec681f3Smrg unsigned num_channels, unsigned dfmt, unsigned nfmt, 13597ec681f3Smrg unsigned cache_policy, bool can_speculate) 13607ec681f3Smrg{ 13617ec681f3Smrg return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset, immoffset, num_channels, dfmt, 13627ec681f3Smrg nfmt, cache_policy, can_speculate, true); 13637ec681f3Smrg} 13647ec681f3Smrg 13657ec681f3SmrgLLVMValueRef ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 13667ec681f3Smrg LLVMValueRef voffset, LLVMValueRef soffset, 13677ec681f3Smrg LLVMValueRef immoffset, unsigned cache_policy) 13687ec681f3Smrg{ 13697ec681f3Smrg voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); 13707ec681f3Smrg 13717ec681f3Smrg return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i16, 13727ec681f3Smrg cache_policy, false, false, false); 13737ec681f3Smrg} 13747ec681f3Smrg 13757ec681f3SmrgLLVMValueRef ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 13767ec681f3Smrg LLVMValueRef voffset, LLVMValueRef soffset, 13777ec681f3Smrg LLVMValueRef immoffset, unsigned cache_policy) 13787ec681f3Smrg{ 13797ec681f3Smrg voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); 13807ec681f3Smrg 13817ec681f3Smrg return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i8, cache_policy, 13827ec681f3Smrg false, false, false); 13837ec681f3Smrg} 13847ec681f3Smrg 13857ec681f3Smrg/** 13867ec681f3Smrg * Convert an 11- or 10-bit unsigned floating point number to an f32. 13877ec681f3Smrg * 13887ec681f3Smrg * The input exponent is expected to be biased analogous to IEEE-754, i.e. by 13897ec681f3Smrg * 2^(exp_bits-1) - 1 (as defined in OpenGL and other graphics APIs). 13907ec681f3Smrg */ 13917ec681f3Smrgstatic LLVMValueRef ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src, 13927ec681f3Smrg unsigned exp_bits, unsigned mant_bits) 13937ec681f3Smrg{ 13947ec681f3Smrg assert(LLVMTypeOf(src) == ctx->i32); 13957ec681f3Smrg 13967ec681f3Smrg LLVMValueRef tmp; 13977ec681f3Smrg LLVMValueRef mantissa; 13987ec681f3Smrg mantissa = 13997ec681f3Smrg LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), ""); 14007ec681f3Smrg 14017ec681f3Smrg /* Converting normal numbers is just a shift + correcting the exponent bias */ 14027ec681f3Smrg unsigned normal_shift = 23 - mant_bits; 14037ec681f3Smrg unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1); 14047ec681f3Smrg LLVMValueRef shifted, normal; 14057ec681f3Smrg 14067ec681f3Smrg shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), ""); 14077ec681f3Smrg normal = 14087ec681f3Smrg LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), ""); 14097ec681f3Smrg 14107ec681f3Smrg /* Converting nan/inf numbers is the same, but with a different exponent update */ 14117ec681f3Smrg LLVMValueRef naninf; 14127ec681f3Smrg naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), ""); 14137ec681f3Smrg 14147ec681f3Smrg /* Converting denormals is the complex case: determine the leading zeros of the 14157ec681f3Smrg * mantissa to obtain the correct shift for the mantissa and exponent correction. 14167ec681f3Smrg */ 14177ec681f3Smrg LLVMValueRef denormal; 14187ec681f3Smrg LLVMValueRef params[2] = { 14197ec681f3Smrg mantissa, ctx->i1true, /* result can be undef when arg is 0 */ 14207ec681f3Smrg }; 14217ec681f3Smrg LLVMValueRef ctlz = 14227ec681f3Smrg ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE); 14237ec681f3Smrg 14247ec681f3Smrg /* Shift such that the leading 1 ends up as the LSB of the exponent field. */ 14257ec681f3Smrg tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), ""); 14267ec681f3Smrg denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, ""); 14277ec681f3Smrg 14287ec681f3Smrg unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1; 14297ec681f3Smrg tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, ""); 14307ec681f3Smrg tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), ""); 14317ec681f3Smrg denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, ""); 14327ec681f3Smrg 14337ec681f3Smrg /* Select the final result. */ 14347ec681f3Smrg LLVMValueRef result; 14357ec681f3Smrg 14367ec681f3Smrg tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src, 14377ec681f3Smrg LLVMConstInt(ctx->i32, ((1ULL << exp_bits) - 1) << mant_bits, false), ""); 14387ec681f3Smrg result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, ""); 14397ec681f3Smrg 14407ec681f3Smrg tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src, 14417ec681f3Smrg LLVMConstInt(ctx->i32, 1ULL << mant_bits, false), ""); 14427ec681f3Smrg result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, ""); 14437ec681f3Smrg 14447ec681f3Smrg tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, ""); 14457ec681f3Smrg result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, ""); 14467ec681f3Smrg 14477ec681f3Smrg return ac_to_float(ctx, result); 14487ec681f3Smrg} 14497ec681f3Smrg 14507ec681f3Smrg/** 14517ec681f3Smrg * Generate a fully general open coded buffer format fetch with all required 14527ec681f3Smrg * fixups suitable for vertex fetch, using non-format buffer loads. 14537ec681f3Smrg * 14547ec681f3Smrg * Some combinations of argument values have special interpretations: 14557ec681f3Smrg * - size = 8 bytes, format = fixed indicates PIPE_FORMAT_R11G11B10_FLOAT 14567ec681f3Smrg * - size = 8 bytes, format != {float,fixed} indicates a 2_10_10_10 data format 14577ec681f3Smrg * 14587ec681f3Smrg * \param log_size log(size of channel in bytes) 14597ec681f3Smrg * \param num_channels number of channels (1 to 4) 14607ec681f3Smrg * \param format AC_FETCH_FORMAT_xxx value 14617ec681f3Smrg * \param reverse whether XYZ channels are reversed 14627ec681f3Smrg * \param known_aligned whether the source is known to be aligned to hardware's 14637ec681f3Smrg * effective element size for loading the given format 14647ec681f3Smrg * (note: this means dword alignment for 8_8_8_8, 16_16, etc.) 14657ec681f3Smrg * \param rsrc buffer resource descriptor 14667ec681f3Smrg * \return the resulting vector of floats or integers bitcast to <4 x i32> 14677ec681f3Smrg */ 14687ec681f3SmrgLLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size, 14697ec681f3Smrg unsigned num_channels, unsigned format, bool reverse, 14707ec681f3Smrg bool known_aligned, LLVMValueRef rsrc, 14717ec681f3Smrg LLVMValueRef vindex, LLVMValueRef voffset, 14727ec681f3Smrg LLVMValueRef soffset, unsigned cache_policy, 14737ec681f3Smrg bool can_speculate) 14747ec681f3Smrg{ 14757ec681f3Smrg LLVMValueRef tmp; 14767ec681f3Smrg unsigned load_log_size = log_size; 14777ec681f3Smrg unsigned load_num_channels = num_channels; 14787ec681f3Smrg if (log_size == 3) { 14797ec681f3Smrg load_log_size = 2; 14807ec681f3Smrg if (format == AC_FETCH_FORMAT_FLOAT) { 14817ec681f3Smrg load_num_channels = 2 * num_channels; 14827ec681f3Smrg } else { 14837ec681f3Smrg load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */ 14847ec681f3Smrg } 14857ec681f3Smrg } 14867ec681f3Smrg 14877ec681f3Smrg int log_recombine = 0; 14887ec681f3Smrg if ((ctx->chip_class == GFX6 || ctx->chip_class >= GFX10) && !known_aligned) { 14897ec681f3Smrg /* Avoid alignment restrictions by loading one byte at a time. */ 14907ec681f3Smrg load_num_channels <<= load_log_size; 14917ec681f3Smrg log_recombine = load_log_size; 14927ec681f3Smrg load_log_size = 0; 14937ec681f3Smrg } else if (load_num_channels == 2 || load_num_channels == 4) { 14947ec681f3Smrg log_recombine = -util_logbase2(load_num_channels); 14957ec681f3Smrg load_num_channels = 1; 14967ec681f3Smrg load_log_size += -log_recombine; 14977ec681f3Smrg } 14987ec681f3Smrg 14997ec681f3Smrg LLVMValueRef loads[32]; /* up to 32 bytes */ 15007ec681f3Smrg for (unsigned i = 0; i < load_num_channels; ++i) { 15017ec681f3Smrg tmp = 15027ec681f3Smrg LLVMBuildAdd(ctx->builder, soffset, LLVMConstInt(ctx->i32, i << load_log_size, false), ""); 15037ec681f3Smrg LLVMTypeRef channel_type = 15047ec681f3Smrg load_log_size == 0 ? ctx->i8 : load_log_size == 1 ? ctx->i16 : ctx->i32; 15057ec681f3Smrg unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2); 15067ec681f3Smrg loads[i] = 15077ec681f3Smrg ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, tmp, num_channels, channel_type, 15087ec681f3Smrg cache_policy, can_speculate, false, true); 15097ec681f3Smrg if (load_log_size >= 2) 15107ec681f3Smrg loads[i] = ac_to_integer(ctx, loads[i]); 15117ec681f3Smrg } 15127ec681f3Smrg 15137ec681f3Smrg if (log_recombine > 0) { 15147ec681f3Smrg /* Recombine bytes if necessary (GFX6 only) */ 15157ec681f3Smrg LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16; 15167ec681f3Smrg 15177ec681f3Smrg for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) { 15187ec681f3Smrg LLVMValueRef accum = NULL; 15197ec681f3Smrg for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) { 15207ec681f3Smrg tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, ""); 15217ec681f3Smrg if (i == 0) { 15227ec681f3Smrg accum = tmp; 15237ec681f3Smrg } else { 15247ec681f3Smrg tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(dst_type, 8 * i, false), ""); 15257ec681f3Smrg accum = LLVMBuildOr(ctx->builder, accum, tmp, ""); 15267ec681f3Smrg } 15277ec681f3Smrg } 15287ec681f3Smrg loads[dst] = accum; 15297ec681f3Smrg } 15307ec681f3Smrg } else if (log_recombine < 0) { 15317ec681f3Smrg /* Split vectors of dwords */ 15327ec681f3Smrg if (load_log_size > 2) { 15337ec681f3Smrg assert(load_num_channels == 1); 15347ec681f3Smrg LLVMValueRef loaded = loads[0]; 15357ec681f3Smrg unsigned log_split = load_log_size - 2; 15367ec681f3Smrg log_recombine += log_split; 15377ec681f3Smrg load_num_channels = 1 << log_split; 15387ec681f3Smrg load_log_size = 2; 15397ec681f3Smrg for (unsigned i = 0; i < load_num_channels; ++i) { 15407ec681f3Smrg tmp = LLVMConstInt(ctx->i32, i, false); 15417ec681f3Smrg loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, ""); 15427ec681f3Smrg } 15437ec681f3Smrg } 15447ec681f3Smrg 15457ec681f3Smrg /* Further split dwords and shorts if required */ 15467ec681f3Smrg if (log_recombine < 0) { 15477ec681f3Smrg for (unsigned src = load_num_channels, dst = load_num_channels << -log_recombine; src > 0; 15487ec681f3Smrg --src) { 15497ec681f3Smrg unsigned dst_bits = 1 << (3 + load_log_size + log_recombine); 15507ec681f3Smrg LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits); 15517ec681f3Smrg LLVMValueRef loaded = loads[src - 1]; 15527ec681f3Smrg LLVMTypeRef loaded_type = LLVMTypeOf(loaded); 15537ec681f3Smrg for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) { 15547ec681f3Smrg tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false); 15557ec681f3Smrg tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, ""); 15567ec681f3Smrg loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, ""); 15577ec681f3Smrg } 15587ec681f3Smrg } 15597ec681f3Smrg } 15607ec681f3Smrg } 15617ec681f3Smrg 15627ec681f3Smrg if (log_size == 3) { 15637ec681f3Smrg if (format == AC_FETCH_FORMAT_FLOAT) { 15647ec681f3Smrg for (unsigned i = 0; i < num_channels; ++i) { 15657ec681f3Smrg tmp = ac_build_gather_values(ctx, &loads[2 * i], 2); 15667ec681f3Smrg loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, ""); 15677ec681f3Smrg } 15687ec681f3Smrg } else if (format == AC_FETCH_FORMAT_FIXED) { 15697ec681f3Smrg /* 10_11_11_FLOAT */ 15707ec681f3Smrg LLVMValueRef data = loads[0]; 15717ec681f3Smrg LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false); 15727ec681f3Smrg LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, ""); 15737ec681f3Smrg tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), ""); 15747ec681f3Smrg LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, ""); 15757ec681f3Smrg LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), ""); 15767ec681f3Smrg 15777ec681f3Smrg loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6)); 15787ec681f3Smrg loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6)); 15797ec681f3Smrg loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5)); 15807ec681f3Smrg 15817ec681f3Smrg num_channels = 3; 15827ec681f3Smrg log_size = 2; 15837ec681f3Smrg format = AC_FETCH_FORMAT_FLOAT; 15847ec681f3Smrg } else { 15857ec681f3Smrg /* 2_10_10_10 data formats */ 15867ec681f3Smrg LLVMValueRef data = loads[0]; 15877ec681f3Smrg LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10); 15887ec681f3Smrg LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2); 15897ec681f3Smrg loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, ""); 15907ec681f3Smrg tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), ""); 15917ec681f3Smrg loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, ""); 15927ec681f3Smrg tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), ""); 15937ec681f3Smrg loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, ""); 15947ec681f3Smrg tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), ""); 15957ec681f3Smrg loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, ""); 15967ec681f3Smrg 15977ec681f3Smrg num_channels = 4; 15987ec681f3Smrg } 15997ec681f3Smrg } 16007ec681f3Smrg 16017ec681f3Smrg if (format == AC_FETCH_FORMAT_FLOAT) { 16027ec681f3Smrg if (log_size != 2) { 16037ec681f3Smrg for (unsigned chan = 0; chan < num_channels; ++chan) { 16047ec681f3Smrg tmp = ac_to_float(ctx, loads[chan]); 16057ec681f3Smrg if (log_size == 3) 16067ec681f3Smrg tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, ""); 16077ec681f3Smrg else if (log_size == 1) 16087ec681f3Smrg tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, ""); 16097ec681f3Smrg loads[chan] = ac_to_integer(ctx, tmp); 16107ec681f3Smrg } 16117ec681f3Smrg } 16127ec681f3Smrg } else if (format == AC_FETCH_FORMAT_UINT) { 16137ec681f3Smrg if (log_size != 2) { 16147ec681f3Smrg for (unsigned chan = 0; chan < num_channels; ++chan) 16157ec681f3Smrg loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, ""); 16167ec681f3Smrg } 16177ec681f3Smrg } else if (format == AC_FETCH_FORMAT_SINT) { 16187ec681f3Smrg if (log_size != 2) { 16197ec681f3Smrg for (unsigned chan = 0; chan < num_channels; ++chan) 16207ec681f3Smrg loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, ""); 16217ec681f3Smrg } 16227ec681f3Smrg } else { 16237ec681f3Smrg bool unsign = format == AC_FETCH_FORMAT_UNORM || format == AC_FETCH_FORMAT_USCALED || 16247ec681f3Smrg format == AC_FETCH_FORMAT_UINT; 16257ec681f3Smrg 16267ec681f3Smrg for (unsigned chan = 0; chan < num_channels; ++chan) { 16277ec681f3Smrg if (unsign) { 16287ec681f3Smrg tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, ""); 16297ec681f3Smrg } else { 16307ec681f3Smrg tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, ""); 16317ec681f3Smrg } 16327ec681f3Smrg 16337ec681f3Smrg LLVMValueRef scale = NULL; 16347ec681f3Smrg if (format == AC_FETCH_FORMAT_FIXED) { 16357ec681f3Smrg assert(log_size == 2); 16367ec681f3Smrg scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000); 16377ec681f3Smrg } else if (format == AC_FETCH_FORMAT_UNORM) { 16387ec681f3Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan])); 16397ec681f3Smrg scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1)); 16407ec681f3Smrg } else if (format == AC_FETCH_FORMAT_SNORM) { 16417ec681f3Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan])); 16427ec681f3Smrg scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1)); 16437ec681f3Smrg } 16447ec681f3Smrg if (scale) 16457ec681f3Smrg tmp = LLVMBuildFMul(ctx->builder, tmp, scale, ""); 16467ec681f3Smrg 16477ec681f3Smrg if (format == AC_FETCH_FORMAT_SNORM) { 16487ec681f3Smrg /* Clamp to [-1, 1] */ 16497ec681f3Smrg LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0); 16507ec681f3Smrg LLVMValueRef clamp = LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, ""); 16517ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, ""); 16527ec681f3Smrg } 16537ec681f3Smrg 16547ec681f3Smrg loads[chan] = ac_to_integer(ctx, tmp); 16557ec681f3Smrg } 16567ec681f3Smrg } 16577ec681f3Smrg 16587ec681f3Smrg while (num_channels < 4) { 16597ec681f3Smrg if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) { 16607ec681f3Smrg loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0; 16617ec681f3Smrg } else { 16627ec681f3Smrg loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0); 16637ec681f3Smrg } 16647ec681f3Smrg num_channels++; 16657ec681f3Smrg } 16667ec681f3Smrg 16677ec681f3Smrg if (reverse) { 16687ec681f3Smrg tmp = loads[0]; 16697ec681f3Smrg loads[0] = loads[2]; 16707ec681f3Smrg loads[2] = tmp; 16717ec681f3Smrg } 16727ec681f3Smrg 16737ec681f3Smrg return ac_build_gather_values(ctx, loads, 4); 16747ec681f3Smrg} 16757ec681f3Smrg 16767ec681f3Smrgstatic void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 16777ec681f3Smrg LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, 16787ec681f3Smrg LLVMValueRef soffset, LLVMValueRef immoffset, 16797ec681f3Smrg unsigned num_channels, unsigned dfmt, unsigned nfmt, 16807ec681f3Smrg unsigned cache_policy, bool structurized) 16817ec681f3Smrg{ 16827ec681f3Smrg voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, immoffset, ""); 16837ec681f3Smrg 16847ec681f3Smrg LLVMValueRef args[7]; 16857ec681f3Smrg int idx = 0; 16867ec681f3Smrg args[idx++] = vdata; 16877ec681f3Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 16887ec681f3Smrg if (structurized) 16897ec681f3Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 16907ec681f3Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 16917ec681f3Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 16927ec681f3Smrg args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0); 16937ec681f3Smrg args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); 16947ec681f3Smrg unsigned func = 16957ec681f3Smrg !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; 16967ec681f3Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 16977ec681f3Smrg char name[256], type_name[8]; 16987ec681f3Smrg 16997ec681f3Smrg LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; 17007ec681f3Smrg ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); 17017ec681f3Smrg 17027ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", indexing_kind, type_name); 17037ec681f3Smrg 17047ec681f3Smrg ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); 17057ec681f3Smrg} 17067ec681f3Smrg 17077ec681f3Smrgvoid ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 17087ec681f3Smrg LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, 17097ec681f3Smrg LLVMValueRef soffset, LLVMValueRef immoffset, 17107ec681f3Smrg unsigned num_channels, unsigned dfmt, unsigned nfmt, 17117ec681f3Smrg unsigned cache_policy) 17127ec681f3Smrg{ 17137ec681f3Smrg ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, immoffset, num_channels, dfmt, 17147ec681f3Smrg nfmt, cache_policy, true); 17157ec681f3Smrg} 17167ec681f3Smrg 17177ec681f3Smrgvoid ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, 17187ec681f3Smrg LLVMValueRef voffset, LLVMValueRef soffset, LLVMValueRef immoffset, 17197ec681f3Smrg unsigned num_channels, unsigned dfmt, unsigned nfmt, 17207ec681f3Smrg unsigned cache_policy) 17217ec681f3Smrg{ 17227ec681f3Smrg ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset, immoffset, num_channels, dfmt, 17237ec681f3Smrg nfmt, cache_policy, false); 17247ec681f3Smrg} 17257ec681f3Smrg 17267ec681f3Smrgvoid ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, 17277ec681f3Smrg LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset, 17287ec681f3Smrg unsigned cache_policy) 17297ec681f3Smrg{ 17307ec681f3Smrg vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, ""); 17317ec681f3Smrg 17327ec681f3Smrg ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false, 17337ec681f3Smrg false); 17347ec681f3Smrg} 17357ec681f3Smrg 17367ec681f3Smrgvoid ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, 17377ec681f3Smrg LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy) 17387ec681f3Smrg{ 17397ec681f3Smrg vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, ""); 17407ec681f3Smrg 17417ec681f3Smrg ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false, 17427ec681f3Smrg false); 17437ec681f3Smrg} 17447ec681f3Smrg 17457ec681f3Smrg/** 17467ec681f3Smrg * Set range metadata on an instruction. This can only be used on load and 17477ec681f3Smrg * call instructions. If you know an instruction can only produce the values 17487ec681f3Smrg * 0, 1, 2, you would do set_range_metadata(value, 0, 3); 17497ec681f3Smrg * \p lo is the minimum value inclusive. 17507ec681f3Smrg * \p hi is the maximum value exclusive. 17517ec681f3Smrg */ 17527ec681f3Smrgvoid ac_set_range_metadata(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned lo, 17537ec681f3Smrg unsigned hi) 17547ec681f3Smrg{ 17557ec681f3Smrg LLVMValueRef range_md, md_args[2]; 17567ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(value); 17577ec681f3Smrg LLVMContextRef context = LLVMGetTypeContext(type); 17587ec681f3Smrg 17597ec681f3Smrg md_args[0] = LLVMConstInt(type, lo, false); 17607ec681f3Smrg md_args[1] = LLVMConstInt(type, hi, false); 17617ec681f3Smrg range_md = LLVMMDNodeInContext(context, md_args, 2); 17627ec681f3Smrg LLVMSetMetadata(value, ctx->range_md_kind, range_md); 17637ec681f3Smrg} 17647ec681f3Smrg 17657ec681f3SmrgLLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx) 17667ec681f3Smrg{ 17677ec681f3Smrg return ac_build_mbcnt(ctx, LLVMConstInt(ctx->iN_wavemask, ~0ull, 0)); 17687ec681f3Smrg} 17697ec681f3Smrg 17707ec681f3Smrg/* 17717ec681f3Smrg * AMD GCN implements derivatives using the local data store (LDS) 17727ec681f3Smrg * All writes to the LDS happen in all executing threads at 17737ec681f3Smrg * the same time. TID is the Thread ID for the current 17747ec681f3Smrg * thread and is a value between 0 and 63, representing 17757ec681f3Smrg * the thread's position in the wavefront. 17767ec681f3Smrg * 17777ec681f3Smrg * For the pixel shader threads are grouped into quads of four pixels. 17787ec681f3Smrg * The TIDs of the pixels of a quad are: 17797ec681f3Smrg * 17807ec681f3Smrg * +------+------+ 17817ec681f3Smrg * |4n + 0|4n + 1| 17827ec681f3Smrg * +------+------+ 17837ec681f3Smrg * |4n + 2|4n + 3| 17847ec681f3Smrg * +------+------+ 17857ec681f3Smrg * 17867ec681f3Smrg * So, masking the TID with 0xfffffffc yields the TID of the top left pixel 17877ec681f3Smrg * of the quad, masking with 0xfffffffd yields the TID of the top pixel of 17887ec681f3Smrg * the current pixel's column, and masking with 0xfffffffe yields the TID 17897ec681f3Smrg * of the left pixel of the current pixel's row. 17907ec681f3Smrg * 17917ec681f3Smrg * Adding 1 yields the TID of the pixel to the right of the left pixel, and 17927ec681f3Smrg * adding 2 yields the TID of the pixel below the top pixel. 17937ec681f3Smrg */ 17947ec681f3SmrgLLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, uint32_t mask, int idx, LLVMValueRef val) 17957ec681f3Smrg{ 17967ec681f3Smrg unsigned tl_lanes[4], trbl_lanes[4]; 17977ec681f3Smrg char name[32], type[8]; 17987ec681f3Smrg LLVMValueRef tl, trbl; 17997ec681f3Smrg LLVMTypeRef result_type; 18007ec681f3Smrg LLVMValueRef result; 18017ec681f3Smrg 18027ec681f3Smrg result_type = ac_to_float_type(ctx, LLVMTypeOf(val)); 18037ec681f3Smrg 18047ec681f3Smrg if (result_type == ctx->f16) 18057ec681f3Smrg val = LLVMBuildZExt(ctx->builder, val, ctx->i32, ""); 18067ec681f3Smrg else if (result_type == ctx->v2f16) 18077ec681f3Smrg val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, ""); 18087ec681f3Smrg 18097ec681f3Smrg for (unsigned i = 0; i < 4; ++i) { 18107ec681f3Smrg tl_lanes[i] = i & mask; 18117ec681f3Smrg trbl_lanes[i] = (i & mask) + idx; 18127ec681f3Smrg } 18137ec681f3Smrg 18147ec681f3Smrg tl = ac_build_quad_swizzle(ctx, val, tl_lanes[0], tl_lanes[1], tl_lanes[2], tl_lanes[3]); 18157ec681f3Smrg trbl = 18167ec681f3Smrg ac_build_quad_swizzle(ctx, val, trbl_lanes[0], trbl_lanes[1], trbl_lanes[2], trbl_lanes[3]); 18177ec681f3Smrg 18187ec681f3Smrg if (result_type == ctx->f16) { 18197ec681f3Smrg tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, ""); 18207ec681f3Smrg trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, ""); 18217ec681f3Smrg } 18227ec681f3Smrg 18237ec681f3Smrg tl = LLVMBuildBitCast(ctx->builder, tl, result_type, ""); 18247ec681f3Smrg trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, ""); 18257ec681f3Smrg result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); 18267ec681f3Smrg 18277ec681f3Smrg ac_build_type_name_for_intr(result_type, type, sizeof(type)); 18287ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type); 18297ec681f3Smrg 18307ec681f3Smrg return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0); 18317ec681f3Smrg} 18327ec681f3Smrg 18337ec681f3Smrgvoid ac_build_sendmsg(struct ac_llvm_context *ctx, uint32_t msg, LLVMValueRef wave_id) 18347ec681f3Smrg{ 18357ec681f3Smrg LLVMValueRef args[2]; 18367ec681f3Smrg args[0] = LLVMConstInt(ctx->i32, msg, false); 18377ec681f3Smrg args[1] = wave_id; 18387ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0); 18397ec681f3Smrg} 18407ec681f3Smrg 18417ec681f3SmrgLLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type) 18427ec681f3Smrg{ 18437ec681f3Smrg LLVMValueRef msb = 18447ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32", dst_type, &arg, 1, AC_FUNC_ATTR_READNONE); 18457ec681f3Smrg 18467ec681f3Smrg /* The HW returns the last bit index from MSB, but NIR/TGSI wants 18477ec681f3Smrg * the index from LSB. Invert it by doing "31 - msb". */ 18487ec681f3Smrg msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), msb, ""); 18497ec681f3Smrg 18507ec681f3Smrg LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); 18517ec681f3Smrg LLVMValueRef cond = 18527ec681f3Smrg LLVMBuildOr(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, ctx->i32_0, ""), 18537ec681f3Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, all_ones, ""), ""); 18547ec681f3Smrg 18557ec681f3Smrg return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); 18567ec681f3Smrg} 18577ec681f3Smrg 18587ec681f3SmrgLLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type) 18597ec681f3Smrg{ 18607ec681f3Smrg const char *intrin_name; 18617ec681f3Smrg LLVMTypeRef type; 18627ec681f3Smrg LLVMValueRef highest_bit; 18637ec681f3Smrg LLVMValueRef zero; 18647ec681f3Smrg unsigned bitsize; 18657ec681f3Smrg 18667ec681f3Smrg bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg)); 18677ec681f3Smrg switch (bitsize) { 18687ec681f3Smrg case 64: 18697ec681f3Smrg intrin_name = "llvm.ctlz.i64"; 18707ec681f3Smrg type = ctx->i64; 18717ec681f3Smrg highest_bit = LLVMConstInt(ctx->i64, 63, false); 18727ec681f3Smrg zero = ctx->i64_0; 18737ec681f3Smrg break; 18747ec681f3Smrg case 32: 18757ec681f3Smrg intrin_name = "llvm.ctlz.i32"; 18767ec681f3Smrg type = ctx->i32; 18777ec681f3Smrg highest_bit = LLVMConstInt(ctx->i32, 31, false); 18787ec681f3Smrg zero = ctx->i32_0; 18797ec681f3Smrg break; 18807ec681f3Smrg case 16: 18817ec681f3Smrg intrin_name = "llvm.ctlz.i16"; 18827ec681f3Smrg type = ctx->i16; 18837ec681f3Smrg highest_bit = LLVMConstInt(ctx->i16, 15, false); 18847ec681f3Smrg zero = ctx->i16_0; 18857ec681f3Smrg break; 18867ec681f3Smrg case 8: 18877ec681f3Smrg intrin_name = "llvm.ctlz.i8"; 18887ec681f3Smrg type = ctx->i8; 18897ec681f3Smrg highest_bit = LLVMConstInt(ctx->i8, 7, false); 18907ec681f3Smrg zero = ctx->i8_0; 18917ec681f3Smrg break; 18927ec681f3Smrg default: 18937ec681f3Smrg unreachable(!"invalid bitsize"); 18947ec681f3Smrg break; 18957ec681f3Smrg } 18967ec681f3Smrg 18977ec681f3Smrg LLVMValueRef params[2] = { 18987ec681f3Smrg arg, 18997ec681f3Smrg ctx->i1true, 19007ec681f3Smrg }; 19017ec681f3Smrg 19027ec681f3Smrg LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE); 19037ec681f3Smrg 19047ec681f3Smrg /* The HW returns the last bit index from MSB, but TGSI/NIR wants 19057ec681f3Smrg * the index from LSB. Invert it by doing "31 - msb". */ 19067ec681f3Smrg msb = LLVMBuildSub(ctx->builder, highest_bit, msb, ""); 19077ec681f3Smrg 19087ec681f3Smrg if (bitsize == 64) { 19097ec681f3Smrg msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, ""); 19107ec681f3Smrg } else if (bitsize < 32) { 19117ec681f3Smrg msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, ""); 19127ec681f3Smrg } 19137ec681f3Smrg 19147ec681f3Smrg /* check for zero */ 19157ec681f3Smrg return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""), 19167ec681f3Smrg LLVMConstInt(ctx->i32, -1, true), msb, ""); 19177ec681f3Smrg} 19187ec681f3Smrg 19197ec681f3SmrgLLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) 19207ec681f3Smrg{ 19217ec681f3Smrg char name[64], type[64]; 19227ec681f3Smrg 19237ec681f3Smrg ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); 19247ec681f3Smrg snprintf(name, sizeof(name), "llvm.minnum.%s", type); 19257ec681f3Smrg LLVMValueRef args[2] = {a, b}; 19267ec681f3Smrg return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE); 19277ec681f3Smrg} 19287ec681f3Smrg 19297ec681f3SmrgLLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) 19307ec681f3Smrg{ 19317ec681f3Smrg char name[64], type[64]; 19327ec681f3Smrg 19337ec681f3Smrg ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); 19347ec681f3Smrg snprintf(name, sizeof(name), "llvm.maxnum.%s", type); 19357ec681f3Smrg LLVMValueRef args[2] = {a, b}; 19367ec681f3Smrg return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE); 19377ec681f3Smrg} 19387ec681f3Smrg 19397ec681f3SmrgLLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) 19407ec681f3Smrg{ 19417ec681f3Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, ""); 19427ec681f3Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 19437ec681f3Smrg} 19447ec681f3Smrg 19457ec681f3SmrgLLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) 19467ec681f3Smrg{ 19477ec681f3Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, ""); 19487ec681f3Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 19497ec681f3Smrg} 19507ec681f3Smrg 19517ec681f3SmrgLLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) 19527ec681f3Smrg{ 19537ec681f3Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, ""); 19547ec681f3Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 19557ec681f3Smrg} 19567ec681f3Smrg 19577ec681f3SmrgLLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) 19587ec681f3Smrg{ 19597ec681f3Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, ""); 19607ec681f3Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 19617ec681f3Smrg} 19627ec681f3Smrg 19637ec681f3SmrgLLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value) 19647ec681f3Smrg{ 19657ec681f3Smrg LLVMTypeRef t = LLVMTypeOf(value); 19667ec681f3Smrg return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)), 19677ec681f3Smrg LLVMConstReal(t, 1.0)); 19687ec681f3Smrg} 19697ec681f3Smrg 19707ec681f3Smrgvoid ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a) 19717ec681f3Smrg{ 19727ec681f3Smrg LLVMValueRef args[9]; 19737ec681f3Smrg 19747ec681f3Smrg args[0] = LLVMConstInt(ctx->i32, a->target, 0); 19757ec681f3Smrg args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0); 19767ec681f3Smrg 19777ec681f3Smrg if (a->compr) { 19787ec681f3Smrg args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], ctx->v2i16, ""); 19797ec681f3Smrg args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], ctx->v2i16, ""); 19807ec681f3Smrg args[4] = LLVMConstInt(ctx->i1, a->done, 0); 19817ec681f3Smrg args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0); 19827ec681f3Smrg 19837ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", ctx->voidt, args, 6, 0); 19847ec681f3Smrg } else { 19857ec681f3Smrg args[2] = a->out[0]; 19867ec681f3Smrg args[3] = a->out[1]; 19877ec681f3Smrg args[4] = a->out[2]; 19887ec681f3Smrg args[5] = a->out[3]; 19897ec681f3Smrg args[6] = LLVMConstInt(ctx->i1, a->done, 0); 19907ec681f3Smrg args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0); 19917ec681f3Smrg 19927ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0); 19937ec681f3Smrg } 19947ec681f3Smrg} 19957ec681f3Smrg 19967ec681f3Smrgvoid ac_build_export_null(struct ac_llvm_context *ctx) 19977ec681f3Smrg{ 19987ec681f3Smrg struct ac_export_args args; 19997ec681f3Smrg 20007ec681f3Smrg args.enabled_channels = 0x0; /* enabled channels */ 20017ec681f3Smrg args.valid_mask = 1; /* whether the EXEC mask is valid */ 20027ec681f3Smrg args.done = 1; /* DONE bit */ 20037ec681f3Smrg args.target = V_008DFC_SQ_EXP_NULL; 20047ec681f3Smrg args.compr = 0; /* COMPR flag (0 = 32-bit export) */ 20057ec681f3Smrg args.out[0] = LLVMGetUndef(ctx->f32); /* R */ 20067ec681f3Smrg args.out[1] = LLVMGetUndef(ctx->f32); /* G */ 20077ec681f3Smrg args.out[2] = LLVMGetUndef(ctx->f32); /* B */ 20087ec681f3Smrg args.out[3] = LLVMGetUndef(ctx->f32); /* A */ 20097ec681f3Smrg 20107ec681f3Smrg ac_build_export(ctx, &args); 20117ec681f3Smrg} 20127ec681f3Smrg 20137ec681f3Smrgstatic unsigned ac_num_coords(enum ac_image_dim dim) 20147ec681f3Smrg{ 20157ec681f3Smrg switch (dim) { 20167ec681f3Smrg case ac_image_1d: 20177ec681f3Smrg return 1; 20187ec681f3Smrg case ac_image_2d: 20197ec681f3Smrg case ac_image_1darray: 20207ec681f3Smrg return 2; 20217ec681f3Smrg case ac_image_3d: 20227ec681f3Smrg case ac_image_cube: 20237ec681f3Smrg case ac_image_2darray: 20247ec681f3Smrg case ac_image_2dmsaa: 20257ec681f3Smrg return 3; 20267ec681f3Smrg case ac_image_2darraymsaa: 20277ec681f3Smrg return 4; 20287ec681f3Smrg default: 20297ec681f3Smrg unreachable("ac_num_coords: bad dim"); 20307ec681f3Smrg } 20317ec681f3Smrg} 20327ec681f3Smrg 20337ec681f3Smrgstatic unsigned ac_num_derivs(enum ac_image_dim dim) 20347ec681f3Smrg{ 20357ec681f3Smrg switch (dim) { 20367ec681f3Smrg case ac_image_1d: 20377ec681f3Smrg case ac_image_1darray: 20387ec681f3Smrg return 2; 20397ec681f3Smrg case ac_image_2d: 20407ec681f3Smrg case ac_image_2darray: 20417ec681f3Smrg case ac_image_cube: 20427ec681f3Smrg return 4; 20437ec681f3Smrg case ac_image_3d: 20447ec681f3Smrg return 6; 20457ec681f3Smrg case ac_image_2dmsaa: 20467ec681f3Smrg case ac_image_2darraymsaa: 20477ec681f3Smrg default: 20487ec681f3Smrg unreachable("derivatives not supported"); 20497ec681f3Smrg } 20507ec681f3Smrg} 20517ec681f3Smrg 20527ec681f3Smrgstatic const char *get_atomic_name(enum ac_atomic_op op) 20537ec681f3Smrg{ 20547ec681f3Smrg switch (op) { 20557ec681f3Smrg case ac_atomic_swap: 20567ec681f3Smrg return "swap"; 20577ec681f3Smrg case ac_atomic_add: 20587ec681f3Smrg return "add"; 20597ec681f3Smrg case ac_atomic_sub: 20607ec681f3Smrg return "sub"; 20617ec681f3Smrg case ac_atomic_smin: 20627ec681f3Smrg return "smin"; 20637ec681f3Smrg case ac_atomic_umin: 20647ec681f3Smrg return "umin"; 20657ec681f3Smrg case ac_atomic_smax: 20667ec681f3Smrg return "smax"; 20677ec681f3Smrg case ac_atomic_umax: 20687ec681f3Smrg return "umax"; 20697ec681f3Smrg case ac_atomic_and: 20707ec681f3Smrg return "and"; 20717ec681f3Smrg case ac_atomic_or: 20727ec681f3Smrg return "or"; 20737ec681f3Smrg case ac_atomic_xor: 20747ec681f3Smrg return "xor"; 20757ec681f3Smrg case ac_atomic_inc_wrap: 20767ec681f3Smrg return "inc"; 20777ec681f3Smrg case ac_atomic_dec_wrap: 20787ec681f3Smrg return "dec"; 20797ec681f3Smrg case ac_atomic_fmin: 20807ec681f3Smrg return "fmin"; 20817ec681f3Smrg case ac_atomic_fmax: 20827ec681f3Smrg return "fmax"; 20837ec681f3Smrg } 20847ec681f3Smrg unreachable("bad atomic op"); 20857ec681f3Smrg} 20867ec681f3Smrg 20877ec681f3SmrgLLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) 20887ec681f3Smrg{ 20897ec681f3Smrg const char *overload[3] = {"", "", ""}; 20907ec681f3Smrg unsigned num_overloads = 0; 20917ec681f3Smrg LLVMValueRef args[18]; 20927ec681f3Smrg unsigned num_args = 0; 20937ec681f3Smrg enum ac_image_dim dim = a->dim; 20947ec681f3Smrg 20957ec681f3Smrg assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || !a->level_zero); 20967ec681f3Smrg assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip && 20977ec681f3Smrg a->opcode != ac_image_store_mip) || 20987ec681f3Smrg a->lod); 20997ec681f3Smrg assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || 21007ec681f3Smrg (!a->compare && !a->offset)); 21017ec681f3Smrg assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || 21027ec681f3Smrg a->opcode == ac_image_get_lod) || 21037ec681f3Smrg !a->bias); 21047ec681f3Smrg assert((a->bias ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <= 21057ec681f3Smrg 1); 21067ec681f3Smrg assert((a->min_lod ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) <= 1); 21077ec681f3Smrg assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic && 21087ec681f3Smrg a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod && 21097ec681f3Smrg a->opcode != ac_image_get_resinfo)); 21107ec681f3Smrg assert(!a->a16 || ctx->chip_class >= GFX9); 21117ec681f3Smrg assert(a->g16 == a->a16 || ctx->chip_class >= GFX10); 21127ec681f3Smrg 21137ec681f3Smrg assert(!a->offset || 21147ec681f3Smrg ac_get_elem_bits(ctx, LLVMTypeOf(a->offset)) == 32); 21157ec681f3Smrg assert(!a->bias || 21167ec681f3Smrg ac_get_elem_bits(ctx, LLVMTypeOf(a->bias)) == 32); 21177ec681f3Smrg assert(!a->compare || 21187ec681f3Smrg ac_get_elem_bits(ctx, LLVMTypeOf(a->compare)) == 32); 21197ec681f3Smrg assert(!a->derivs[0] || 21207ec681f3Smrg ((!a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 16) && 21217ec681f3Smrg (a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 32))); 21227ec681f3Smrg assert(!a->coords[0] || 21237ec681f3Smrg ((!a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 16) && 21247ec681f3Smrg (a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 32))); 21257ec681f3Smrg assert(!a->lod || 21267ec681f3Smrg ((a->opcode != ac_image_get_resinfo || ac_get_elem_bits(ctx, LLVMTypeOf(a->lod))) && 21277ec681f3Smrg (a->opcode == ac_image_get_resinfo || 21287ec681f3Smrg ac_get_elem_bits(ctx, LLVMTypeOf(a->lod)) == 21297ec681f3Smrg ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0]))))); 21307ec681f3Smrg assert(!a->min_lod || 21317ec681f3Smrg ac_get_elem_bits(ctx, LLVMTypeOf(a->min_lod)) == 21327ec681f3Smrg ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0]))); 21337ec681f3Smrg 21347ec681f3Smrg if (a->opcode == ac_image_get_lod) { 21357ec681f3Smrg switch (dim) { 21367ec681f3Smrg case ac_image_1darray: 21377ec681f3Smrg dim = ac_image_1d; 21387ec681f3Smrg break; 21397ec681f3Smrg case ac_image_2darray: 21407ec681f3Smrg case ac_image_cube: 21417ec681f3Smrg dim = ac_image_2d; 21427ec681f3Smrg break; 21437ec681f3Smrg default: 21447ec681f3Smrg break; 21457ec681f3Smrg } 21467ec681f3Smrg } 21477ec681f3Smrg 21487ec681f3Smrg bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || 21497ec681f3Smrg a->opcode == ac_image_get_lod; 21507ec681f3Smrg bool atomic = a->opcode == ac_image_atomic || a->opcode == ac_image_atomic_cmpswap; 21517ec681f3Smrg bool load = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || 21527ec681f3Smrg a->opcode == ac_image_load || a->opcode == ac_image_load_mip; 21537ec681f3Smrg LLVMTypeRef coord_type = sample ? (a->a16 ? ctx->f16 : ctx->f32) : (a->a16 ? ctx->i16 : ctx->i32); 21547ec681f3Smrg uint8_t dmask = a->dmask; 21557ec681f3Smrg LLVMTypeRef data_type; 21567ec681f3Smrg char data_type_str[32]; 21577ec681f3Smrg 21587ec681f3Smrg if (atomic) { 21597ec681f3Smrg data_type = LLVMTypeOf(a->data[0]); 21607ec681f3Smrg } else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { 21617ec681f3Smrg /* Image stores might have been shrinked using the format. */ 21627ec681f3Smrg data_type = LLVMTypeOf(a->data[0]); 21637ec681f3Smrg dmask = (1 << ac_get_llvm_num_components(a->data[0])) - 1; 21647ec681f3Smrg } else { 21657ec681f3Smrg data_type = a->d16 ? ctx->v4f16 : ctx->v4f32; 21667ec681f3Smrg } 21677ec681f3Smrg 21687ec681f3Smrg if (a->tfe) { 21697ec681f3Smrg data_type = LLVMStructTypeInContext( 21707ec681f3Smrg ctx->context, (LLVMTypeRef[]){data_type, ctx->i32}, 2, false); 21717ec681f3Smrg } 21727ec681f3Smrg 21737ec681f3Smrg if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { 21747ec681f3Smrg args[num_args++] = a->data[0]; 21757ec681f3Smrg if (a->opcode == ac_image_atomic_cmpswap) 21767ec681f3Smrg args[num_args++] = a->data[1]; 21777ec681f3Smrg } 21787ec681f3Smrg 21797ec681f3Smrg if (!atomic) 21807ec681f3Smrg args[num_args++] = LLVMConstInt(ctx->i32, dmask, false); 21817ec681f3Smrg 21827ec681f3Smrg if (a->offset) 21837ec681f3Smrg args[num_args++] = ac_to_integer(ctx, a->offset); 21847ec681f3Smrg if (a->bias) { 21857ec681f3Smrg args[num_args++] = ac_to_float(ctx, a->bias); 21867ec681f3Smrg overload[num_overloads++] = ".f32"; 21877ec681f3Smrg } 21887ec681f3Smrg if (a->compare) 21897ec681f3Smrg args[num_args++] = ac_to_float(ctx, a->compare); 21907ec681f3Smrg if (a->derivs[0]) { 21917ec681f3Smrg unsigned count = ac_num_derivs(dim); 21927ec681f3Smrg for (unsigned i = 0; i < count; ++i) 21937ec681f3Smrg args[num_args++] = ac_to_float(ctx, a->derivs[i]); 21947ec681f3Smrg overload[num_overloads++] = a->g16 ? ".f16" : ".f32"; 21957ec681f3Smrg } 21967ec681f3Smrg unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0; 21977ec681f3Smrg for (unsigned i = 0; i < num_coords; ++i) 21987ec681f3Smrg args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, ""); 21997ec681f3Smrg if (a->lod) 22007ec681f3Smrg args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, ""); 22017ec681f3Smrg if (a->min_lod) 22027ec681f3Smrg args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, ""); 22037ec681f3Smrg 22047ec681f3Smrg overload[num_overloads++] = sample ? (a->a16 ? ".f16" : ".f32") : (a->a16 ? ".i16" : ".i32"); 22057ec681f3Smrg 22067ec681f3Smrg args[num_args++] = a->resource; 22077ec681f3Smrg if (sample) { 22087ec681f3Smrg args[num_args++] = a->sampler; 22097ec681f3Smrg args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false); 22107ec681f3Smrg } 22117ec681f3Smrg 22127ec681f3Smrg args[num_args++] = a->tfe ? ctx->i32_1 : ctx->i32_0; /* texfailctrl */ 22137ec681f3Smrg args[num_args++] = LLVMConstInt( 22147ec681f3Smrg ctx->i32, load ? get_load_cache_policy(ctx, a->cache_policy) : a->cache_policy, false); 22157ec681f3Smrg 22167ec681f3Smrg const char *name; 22177ec681f3Smrg const char *atomic_subop = ""; 22187ec681f3Smrg switch (a->opcode) { 22197ec681f3Smrg case ac_image_sample: 22207ec681f3Smrg name = "sample"; 22217ec681f3Smrg break; 22227ec681f3Smrg case ac_image_gather4: 22237ec681f3Smrg name = "gather4"; 22247ec681f3Smrg break; 22257ec681f3Smrg case ac_image_load: 22267ec681f3Smrg name = "load"; 22277ec681f3Smrg break; 22287ec681f3Smrg case ac_image_load_mip: 22297ec681f3Smrg name = "load.mip"; 22307ec681f3Smrg break; 22317ec681f3Smrg case ac_image_store: 22327ec681f3Smrg name = "store"; 22337ec681f3Smrg break; 22347ec681f3Smrg case ac_image_store_mip: 22357ec681f3Smrg name = "store.mip"; 22367ec681f3Smrg break; 22377ec681f3Smrg case ac_image_atomic: 22387ec681f3Smrg name = "atomic."; 22397ec681f3Smrg atomic_subop = get_atomic_name(a->atomic); 22407ec681f3Smrg break; 22417ec681f3Smrg case ac_image_atomic_cmpswap: 22427ec681f3Smrg name = "atomic."; 22437ec681f3Smrg atomic_subop = "cmpswap"; 22447ec681f3Smrg break; 22457ec681f3Smrg case ac_image_get_lod: 22467ec681f3Smrg name = "getlod"; 22477ec681f3Smrg break; 22487ec681f3Smrg case ac_image_get_resinfo: 22497ec681f3Smrg name = "getresinfo"; 22507ec681f3Smrg break; 22517ec681f3Smrg default: 22527ec681f3Smrg unreachable("invalid image opcode"); 22537ec681f3Smrg } 22547ec681f3Smrg 22557ec681f3Smrg const char *dimname; 22567ec681f3Smrg switch (dim) { 22577ec681f3Smrg case ac_image_1d: 22587ec681f3Smrg dimname = "1d"; 22597ec681f3Smrg break; 22607ec681f3Smrg case ac_image_2d: 22617ec681f3Smrg dimname = "2d"; 22627ec681f3Smrg break; 22637ec681f3Smrg case ac_image_3d: 22647ec681f3Smrg dimname = "3d"; 22657ec681f3Smrg break; 22667ec681f3Smrg case ac_image_cube: 22677ec681f3Smrg dimname = "cube"; 22687ec681f3Smrg break; 22697ec681f3Smrg case ac_image_1darray: 22707ec681f3Smrg dimname = "1darray"; 22717ec681f3Smrg break; 22727ec681f3Smrg case ac_image_2darray: 22737ec681f3Smrg dimname = "2darray"; 22747ec681f3Smrg break; 22757ec681f3Smrg case ac_image_2dmsaa: 22767ec681f3Smrg dimname = "2dmsaa"; 22777ec681f3Smrg break; 22787ec681f3Smrg case ac_image_2darraymsaa: 22797ec681f3Smrg dimname = "2darraymsaa"; 22807ec681f3Smrg break; 22817ec681f3Smrg default: 22827ec681f3Smrg unreachable("invalid dim"); 22837ec681f3Smrg } 22847ec681f3Smrg 22857ec681f3Smrg ac_build_type_name_for_intr(data_type, data_type_str, sizeof(data_type_str)); 22867ec681f3Smrg 22877ec681f3Smrg bool lod_suffix = a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); 22887ec681f3Smrg char intr_name[96]; 22897ec681f3Smrg snprintf(intr_name, sizeof(intr_name), 22907ec681f3Smrg "llvm.amdgcn.image.%s%s" /* base name */ 22917ec681f3Smrg "%s%s%s%s" /* sample/gather modifiers */ 22927ec681f3Smrg ".%s.%s%s%s%s", /* dimension and type overloads */ 22937ec681f3Smrg name, atomic_subop, a->compare ? ".c" : "", 22947ec681f3Smrg a->bias ? ".b" : lod_suffix ? ".l" : a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "", 22957ec681f3Smrg a->min_lod ? ".cl" : "", a->offset ? ".o" : "", dimname, 22967ec681f3Smrg data_type_str, overload[0], overload[1], overload[2]); 22977ec681f3Smrg 22987ec681f3Smrg LLVMTypeRef retty; 22997ec681f3Smrg if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) 23007ec681f3Smrg retty = ctx->voidt; 23017ec681f3Smrg else 23027ec681f3Smrg retty = data_type; 23037ec681f3Smrg 23047ec681f3Smrg LLVMValueRef result = ac_build_intrinsic(ctx, intr_name, retty, args, num_args, a->attributes); 23057ec681f3Smrg if (a->tfe) { 23067ec681f3Smrg LLVMValueRef texel = LLVMBuildExtractValue(ctx->builder, result, 0, ""); 23077ec681f3Smrg LLVMValueRef code = LLVMBuildExtractValue(ctx->builder, result, 1, ""); 23087ec681f3Smrg result = ac_build_concat(ctx, texel, ac_to_float(ctx, code)); 23097ec681f3Smrg } 23107ec681f3Smrg 23117ec681f3Smrg if (!sample && !atomic && retty != ctx->voidt) 23127ec681f3Smrg result = ac_to_integer(ctx, result); 23137ec681f3Smrg 23147ec681f3Smrg return result; 23157ec681f3Smrg} 23167ec681f3Smrg 23177ec681f3SmrgLLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef rsrc) 23187ec681f3Smrg{ 23197ec681f3Smrg LLVMValueRef samples; 23207ec681f3Smrg 23217ec681f3Smrg /* Read the samples from the descriptor directly. 23227ec681f3Smrg * Hardware doesn't have any instruction for this. 23237ec681f3Smrg */ 23247ec681f3Smrg samples = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 3, 0), ""); 23257ec681f3Smrg samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, 0), ""); 23267ec681f3Smrg samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), ""); 23277ec681f3Smrg samples = LLVMBuildShl(ctx->builder, ctx->i32_1, samples, ""); 23287ec681f3Smrg return samples; 23297ec681f3Smrg} 23307ec681f3Smrg 23317ec681f3SmrgLLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) 23327ec681f3Smrg{ 23337ec681f3Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2, 23347ec681f3Smrg AC_FUNC_ATTR_READNONE); 23357ec681f3Smrg} 23367ec681f3Smrg 23377ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) 23387ec681f3Smrg{ 23397ec681f3Smrg LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", ctx->v2i16, args, 2, 23407ec681f3Smrg AC_FUNC_ATTR_READNONE); 23417ec681f3Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 23427ec681f3Smrg} 23437ec681f3Smrg 23447ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) 23457ec681f3Smrg{ 23467ec681f3Smrg LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", ctx->v2i16, args, 2, 23477ec681f3Smrg AC_FUNC_ATTR_READNONE); 23487ec681f3Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 23497ec681f3Smrg} 23507ec681f3Smrg 23517ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_i16_f16(struct ac_llvm_context *ctx, 23527ec681f3Smrg LLVMValueRef args[2]) 23537ec681f3Smrg{ 23547ec681f3Smrg LLVMTypeRef param_types[] = {ctx->f16, ctx->f16}; 23557ec681f3Smrg LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false); 23567ec681f3Smrg LLVMValueRef code = LLVMConstInlineAsm(calltype, 23577ec681f3Smrg "v_cvt_pknorm_i16_f16 $0, $1, $2", "=v,v,v", 23587ec681f3Smrg false, false); 23597ec681f3Smrg return LLVMBuildCall(ctx->builder, code, args, 2, ""); 23607ec681f3Smrg} 23617ec681f3Smrg 23627ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_u16_f16(struct ac_llvm_context *ctx, 23637ec681f3Smrg LLVMValueRef args[2]) 23647ec681f3Smrg{ 23657ec681f3Smrg LLVMTypeRef param_types[] = {ctx->f16, ctx->f16}; 23667ec681f3Smrg LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false); 23677ec681f3Smrg LLVMValueRef code = LLVMConstInlineAsm(calltype, 23687ec681f3Smrg "v_cvt_pknorm_u16_f16 $0, $1, $2", "=v,v,v", 23697ec681f3Smrg false, false); 23707ec681f3Smrg return LLVMBuildCall(ctx->builder, code, args, 2, ""); 23717ec681f3Smrg} 23727ec681f3Smrg 23737ec681f3Smrg/* The 8-bit and 10-bit clamping is for HW workarounds. */ 23747ec681f3SmrgLLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, 23757ec681f3Smrg bool hi) 23767ec681f3Smrg{ 23777ec681f3Smrg assert(bits == 8 || bits == 10 || bits == 16); 23787ec681f3Smrg 23797ec681f3Smrg LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); 23807ec681f3Smrg LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); 23817ec681f3Smrg LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1; 23827ec681f3Smrg LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); 23837ec681f3Smrg 23847ec681f3Smrg /* Clamp. */ 23857ec681f3Smrg if (bits != 16) { 23867ec681f3Smrg for (int i = 0; i < 2; i++) { 23877ec681f3Smrg bool alpha = hi && i == 1; 23887ec681f3Smrg args[i] = ac_build_imin(ctx, args[i], alpha ? max_alpha : max_rgb); 23897ec681f3Smrg args[i] = ac_build_imax(ctx, args[i], alpha ? min_alpha : min_rgb); 23907ec681f3Smrg } 23917ec681f3Smrg } 23927ec681f3Smrg 23937ec681f3Smrg LLVMValueRef res = 23947ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE); 23957ec681f3Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 23967ec681f3Smrg} 23977ec681f3Smrg 23987ec681f3Smrg/* The 8-bit and 10-bit clamping is for HW workarounds. */ 23997ec681f3SmrgLLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, 24007ec681f3Smrg bool hi) 24017ec681f3Smrg{ 24027ec681f3Smrg assert(bits == 8 || bits == 10 || bits == 16); 24037ec681f3Smrg 24047ec681f3Smrg LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); 24057ec681f3Smrg LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); 24067ec681f3Smrg 24077ec681f3Smrg /* Clamp. */ 24087ec681f3Smrg if (bits != 16) { 24097ec681f3Smrg for (int i = 0; i < 2; i++) { 24107ec681f3Smrg bool alpha = hi && i == 1; 24117ec681f3Smrg args[i] = ac_build_umin(ctx, args[i], alpha ? max_alpha : max_rgb); 24127ec681f3Smrg } 24137ec681f3Smrg } 24147ec681f3Smrg 24157ec681f3Smrg LLVMValueRef res = 24167ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE); 24177ec681f3Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 24187ec681f3Smrg} 24197ec681f3Smrg 24207ec681f3SmrgLLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) 24217ec681f3Smrg{ 24227ec681f3Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, AC_FUNC_ATTR_READNONE); 24237ec681f3Smrg} 24247ec681f3Smrg 24257ec681f3Smrgvoid ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) 24267ec681f3Smrg{ 24277ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0); 24287ec681f3Smrg} 24297ec681f3Smrg 24307ec681f3SmrgLLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, 24317ec681f3Smrg LLVMValueRef width, bool is_signed) 24327ec681f3Smrg{ 24337ec681f3Smrg LLVMValueRef args[] = { 24347ec681f3Smrg input, 24357ec681f3Smrg offset, 24367ec681f3Smrg width, 24377ec681f3Smrg }; 24387ec681f3Smrg 24397ec681f3Smrg return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : "llvm.amdgcn.ubfe.i32", 24407ec681f3Smrg ctx->i32, args, 3, AC_FUNC_ATTR_READNONE); 24417ec681f3Smrg} 24427ec681f3Smrg 24437ec681f3SmrgLLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, 24447ec681f3Smrg LLVMValueRef s2) 24457ec681f3Smrg{ 24467ec681f3Smrg return LLVMBuildAdd(ctx->builder, LLVMBuildMul(ctx->builder, s0, s1, ""), s2, ""); 24477ec681f3Smrg} 24487ec681f3Smrg 24497ec681f3SmrgLLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, 24507ec681f3Smrg LLVMValueRef s2) 24517ec681f3Smrg{ 24527ec681f3Smrg /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ 24537ec681f3Smrg if (ctx->chip_class >= GFX10) { 24547ec681f3Smrg return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, (LLVMValueRef[]){s0, s1, s2}, 3, 24557ec681f3Smrg AC_FUNC_ATTR_READNONE); 24567ec681f3Smrg } 24577ec681f3Smrg 24587ec681f3Smrg return LLVMBuildFAdd(ctx->builder, LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, ""); 24597ec681f3Smrg} 24607ec681f3Smrg 24617ec681f3Smrgvoid ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags) 24627ec681f3Smrg{ 24637ec681f3Smrg if (!wait_flags) 24647ec681f3Smrg return; 24657ec681f3Smrg 24667ec681f3Smrg unsigned lgkmcnt = 63; 24677ec681f3Smrg unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15; 24687ec681f3Smrg unsigned vscnt = 63; 24697ec681f3Smrg 24707ec681f3Smrg if (wait_flags & AC_WAIT_LGKM) 24717ec681f3Smrg lgkmcnt = 0; 24727ec681f3Smrg if (wait_flags & AC_WAIT_VLOAD) 24737ec681f3Smrg vmcnt = 0; 24747ec681f3Smrg 24757ec681f3Smrg if (wait_flags & AC_WAIT_VSTORE) { 24767ec681f3Smrg if (ctx->chip_class >= GFX10) 24777ec681f3Smrg vscnt = 0; 24787ec681f3Smrg else 24797ec681f3Smrg vmcnt = 0; 24807ec681f3Smrg } 24817ec681f3Smrg 24827ec681f3Smrg /* There is no intrinsic for vscnt(0), so use a fence. */ 24837ec681f3Smrg if ((wait_flags & AC_WAIT_LGKM && wait_flags & AC_WAIT_VLOAD && wait_flags & AC_WAIT_VSTORE) || 24847ec681f3Smrg vscnt == 0) { 24857ec681f3Smrg LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, ""); 24867ec681f3Smrg return; 24877ec681f3Smrg } 24887ec681f3Smrg 24897ec681f3Smrg unsigned simm16 = (lgkmcnt << 8) | (7 << 4) | /* expcnt */ 24907ec681f3Smrg (vmcnt & 0xf) | ((vmcnt >> 4) << 14); 24917ec681f3Smrg 24927ec681f3Smrg LLVMValueRef args[1] = { 24937ec681f3Smrg LLVMConstInt(ctx->i32, simm16, false), 24947ec681f3Smrg }; 24957ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); 24967ec681f3Smrg} 24977ec681f3Smrg 24987ec681f3SmrgLLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src, 24997ec681f3Smrg LLVMTypeRef type) 25007ec681f3Smrg{ 25017ec681f3Smrg unsigned bitsize = ac_get_elem_bits(ctx, type); 25027ec681f3Smrg LLVMValueRef zero = LLVMConstReal(type, 0.0); 25037ec681f3Smrg LLVMValueRef one = LLVMConstReal(type, 1.0); 25047ec681f3Smrg LLVMValueRef result; 25057ec681f3Smrg 25067ec681f3Smrg if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8) || type == ctx->v2f16) { 25077ec681f3Smrg /* Use fmin/fmax for 64-bit fsat or 16-bit on GFX6-GFX8 because LLVM 25087ec681f3Smrg * doesn't expose an intrinsic. 25097ec681f3Smrg */ 25107ec681f3Smrg result = ac_build_fmin(ctx, ac_build_fmax(ctx, src, zero), one); 25117ec681f3Smrg } else { 25127ec681f3Smrg LLVMTypeRef type; 25137ec681f3Smrg char *intr; 25147ec681f3Smrg 25157ec681f3Smrg if (bitsize == 16) { 25167ec681f3Smrg intr = "llvm.amdgcn.fmed3.f16"; 25177ec681f3Smrg type = ctx->f16; 25187ec681f3Smrg } else { 25197ec681f3Smrg assert(bitsize == 32); 25207ec681f3Smrg intr = "llvm.amdgcn.fmed3.f32"; 25217ec681f3Smrg type = ctx->f32; 25227ec681f3Smrg } 25237ec681f3Smrg 25247ec681f3Smrg LLVMValueRef params[] = { 25257ec681f3Smrg zero, 25267ec681f3Smrg one, 25277ec681f3Smrg src, 25287ec681f3Smrg }; 25297ec681f3Smrg 25307ec681f3Smrg result = ac_build_intrinsic(ctx, intr, type, params, 3, 25317ec681f3Smrg AC_FUNC_ATTR_READNONE); 25327ec681f3Smrg } 25337ec681f3Smrg 25347ec681f3Smrg if (ctx->chip_class < GFX9 && bitsize == 32) { 25357ec681f3Smrg /* Only pre-GFX9 chips do not flush denorms. */ 25367ec681f3Smrg result = ac_build_canonicalize(ctx, result, bitsize); 25377ec681f3Smrg } 25387ec681f3Smrg 25397ec681f3Smrg return result; 25407ec681f3Smrg} 25417ec681f3Smrg 25427ec681f3SmrgLLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) 25437ec681f3Smrg{ 25447ec681f3Smrg LLVMTypeRef type; 25457ec681f3Smrg char *intr; 25467ec681f3Smrg 25477ec681f3Smrg if (bitsize == 16) { 25487ec681f3Smrg intr = "llvm.amdgcn.fract.f16"; 25497ec681f3Smrg type = ctx->f16; 25507ec681f3Smrg } else if (bitsize == 32) { 25517ec681f3Smrg intr = "llvm.amdgcn.fract.f32"; 25527ec681f3Smrg type = ctx->f32; 25537ec681f3Smrg } else { 25547ec681f3Smrg intr = "llvm.amdgcn.fract.f64"; 25557ec681f3Smrg type = ctx->f64; 25567ec681f3Smrg } 25577ec681f3Smrg 25587ec681f3Smrg LLVMValueRef params[] = { 25597ec681f3Smrg src0, 25607ec681f3Smrg }; 25617ec681f3Smrg return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); 25627ec681f3Smrg} 25637ec681f3Smrg 25647ec681f3SmrgLLVMValueRef ac_const_uint_vec(struct ac_llvm_context *ctx, LLVMTypeRef type, uint64_t value) 25657ec681f3Smrg{ 25667ec681f3Smrg 25677ec681f3Smrg if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { 25687ec681f3Smrg LLVMValueRef scalar = LLVMConstInt(LLVMGetElementType(type), value, 0); 25697ec681f3Smrg unsigned vec_size = LLVMGetVectorSize(type); 25707ec681f3Smrg LLVMValueRef *scalars = alloca(vec_size * sizeof(LLVMValueRef)); 25717ec681f3Smrg 25727ec681f3Smrg for (unsigned i = 0; i < vec_size; i++) 25737ec681f3Smrg scalars[i] = scalar; 25747ec681f3Smrg return LLVMConstVector(scalars, vec_size); 25757ec681f3Smrg } 25767ec681f3Smrg return LLVMConstInt(type, value, 0); 25777ec681f3Smrg} 25787ec681f3Smrg 25797ec681f3SmrgLLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0) 25807ec681f3Smrg{ 25817ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(src0); 25827ec681f3Smrg LLVMValueRef val; 25837ec681f3Smrg 25847ec681f3Smrg /* v_med3 is selected only when max is first. (LLVM bug?) */ 25857ec681f3Smrg val = ac_build_imax(ctx, src0, ac_const_uint_vec(ctx, type, -1)); 25867ec681f3Smrg return ac_build_imin(ctx, val, ac_const_uint_vec(ctx, type, 1)); 25877ec681f3Smrg} 25887ec681f3Smrg 25897ec681f3Smrgstatic LLVMValueRef ac_eliminate_negative_zero(struct ac_llvm_context *ctx, LLVMValueRef val) 25907ec681f3Smrg{ 25917ec681f3Smrg ac_enable_signed_zeros(ctx); 25927ec681f3Smrg /* (val + 0) converts negative zero to positive zero. */ 25937ec681f3Smrg val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), ""); 25947ec681f3Smrg ac_disable_signed_zeros(ctx); 25957ec681f3Smrg return val; 25967ec681f3Smrg} 25977ec681f3Smrg 25987ec681f3SmrgLLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src) 25997ec681f3Smrg{ 26007ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(src); 26017ec681f3Smrg LLVMValueRef pos, neg, dw[2], val; 26027ec681f3Smrg unsigned bitsize = ac_get_elem_bits(ctx, type); 26037ec681f3Smrg 26047ec681f3Smrg /* The standard version leads to this: 26057ec681f3Smrg * v_cmp_ngt_f32_e64 s[0:1], s4, 0 ; D40B0000 00010004 26067ec681f3Smrg * v_cndmask_b32_e64 v4, 1.0, s4, s[0:1] ; D5010004 000008F2 26077ec681f3Smrg * v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880 26087ec681f3Smrg * v_cndmask_b32_e32 v4, -1.0, v4, vcc ; 020808F3 26097ec681f3Smrg * 26107ec681f3Smrg * The isign version: 26117ec681f3Smrg * v_add_f32_e64 v4, s4, 0 ; D5030004 00010004 26127ec681f3Smrg * v_med3_i32 v4, v4, -1, 1 ; D5580004 02058304 26137ec681f3Smrg * v_cvt_f32_i32_e32 v4, v4 ; 7E080B04 26147ec681f3Smrg * 26157ec681f3Smrg * (src0 + 0) converts negative zero to positive zero. 26167ec681f3Smrg * After that, int(fsign(x)) == isign(floatBitsToInt(x)). 26177ec681f3Smrg * 26187ec681f3Smrg * For FP64, use the standard version, which doesn't suffer from the huge DP rate 26197ec681f3Smrg * reduction. (FP64 comparisons are as fast as int64 comparisons) 26207ec681f3Smrg */ 26217ec681f3Smrg if (bitsize == 16 || bitsize == 32) { 26227ec681f3Smrg val = ac_to_integer(ctx, ac_eliminate_negative_zero(ctx, src)); 26237ec681f3Smrg val = ac_build_isign(ctx, val); 26247ec681f3Smrg return LLVMBuildSIToFP(ctx->builder, val, type, ""); 26257ec681f3Smrg } 26267ec681f3Smrg 26277ec681f3Smrg assert(bitsize == 64); 26287ec681f3Smrg pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, ""); 26297ec681f3Smrg neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, ""); 26307ec681f3Smrg dw[0] = ctx->i32_0; 26317ec681f3Smrg dw[1] = LLVMBuildSelect( 26327ec681f3Smrg ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0), 26337ec681f3Smrg LLVMBuildSelect(ctx->builder, neg, LLVMConstInt(ctx->i32, 0xBFF00000, 0), ctx->i32_0, ""), 26347ec681f3Smrg ""); 26357ec681f3Smrg return LLVMBuildBitCast(ctx->builder, ac_build_gather_values(ctx, dw, 2), ctx->f64, ""); 26367ec681f3Smrg} 26377ec681f3Smrg 26387ec681f3SmrgLLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0) 26397ec681f3Smrg{ 26407ec681f3Smrg LLVMValueRef result; 26417ec681f3Smrg unsigned bitsize; 26427ec681f3Smrg 26437ec681f3Smrg bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); 26447ec681f3Smrg 26457ec681f3Smrg switch (bitsize) { 26467ec681f3Smrg case 128: 26477ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, (LLVMValueRef[]){src0}, 1, 26487ec681f3Smrg AC_FUNC_ATTR_READNONE); 26497ec681f3Smrg result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); 26507ec681f3Smrg break; 26517ec681f3Smrg case 64: 26527ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, (LLVMValueRef[]){src0}, 1, 26537ec681f3Smrg AC_FUNC_ATTR_READNONE); 26547ec681f3Smrg 26557ec681f3Smrg result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); 26567ec681f3Smrg break; 26577ec681f3Smrg case 32: 26587ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, (LLVMValueRef[]){src0}, 1, 26597ec681f3Smrg AC_FUNC_ATTR_READNONE); 26607ec681f3Smrg break; 26617ec681f3Smrg case 16: 26627ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, (LLVMValueRef[]){src0}, 1, 26637ec681f3Smrg AC_FUNC_ATTR_READNONE); 26647ec681f3Smrg 26657ec681f3Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 26667ec681f3Smrg break; 26677ec681f3Smrg case 8: 26687ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, (LLVMValueRef[]){src0}, 1, 26697ec681f3Smrg AC_FUNC_ATTR_READNONE); 26707ec681f3Smrg 26717ec681f3Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 26727ec681f3Smrg break; 26737ec681f3Smrg default: 26747ec681f3Smrg unreachable(!"invalid bitsize"); 26757ec681f3Smrg break; 26767ec681f3Smrg } 26777ec681f3Smrg 26787ec681f3Smrg return result; 26797ec681f3Smrg} 26807ec681f3Smrg 26817ec681f3SmrgLLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0) 26827ec681f3Smrg{ 26837ec681f3Smrg LLVMValueRef result; 26847ec681f3Smrg unsigned bitsize; 26857ec681f3Smrg 26867ec681f3Smrg bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); 26877ec681f3Smrg 26887ec681f3Smrg switch (bitsize) { 26897ec681f3Smrg case 64: 26907ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, (LLVMValueRef[]){src0}, 1, 26917ec681f3Smrg AC_FUNC_ATTR_READNONE); 26927ec681f3Smrg 26937ec681f3Smrg result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); 26947ec681f3Smrg break; 26957ec681f3Smrg case 32: 26967ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, (LLVMValueRef[]){src0}, 1, 26977ec681f3Smrg AC_FUNC_ATTR_READNONE); 26987ec681f3Smrg break; 26997ec681f3Smrg case 16: 27007ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, (LLVMValueRef[]){src0}, 1, 27017ec681f3Smrg AC_FUNC_ATTR_READNONE); 27027ec681f3Smrg 27037ec681f3Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 27047ec681f3Smrg break; 27057ec681f3Smrg case 8: 27067ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, (LLVMValueRef[]){src0}, 1, 27077ec681f3Smrg AC_FUNC_ATTR_READNONE); 27087ec681f3Smrg 27097ec681f3Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 27107ec681f3Smrg break; 27117ec681f3Smrg default: 27127ec681f3Smrg unreachable(!"invalid bitsize"); 27137ec681f3Smrg break; 27147ec681f3Smrg } 27157ec681f3Smrg 27167ec681f3Smrg return result; 27177ec681f3Smrg} 27187ec681f3Smrg 27197ec681f3Smrg#define AC_EXP_TARGET 0 27207ec681f3Smrg#define AC_EXP_ENABLED_CHANNELS 1 27217ec681f3Smrg#define AC_EXP_OUT0 2 27227ec681f3Smrg 27237ec681f3Smrgenum ac_ir_type 27247ec681f3Smrg{ 27257ec681f3Smrg AC_IR_UNDEF, 27267ec681f3Smrg AC_IR_CONST, 27277ec681f3Smrg AC_IR_VALUE, 27287ec681f3Smrg}; 27297ec681f3Smrg 27307ec681f3Smrgstruct ac_vs_exp_chan { 27317ec681f3Smrg LLVMValueRef value; 27327ec681f3Smrg float const_float; 27337ec681f3Smrg enum ac_ir_type type; 27347ec681f3Smrg}; 27357ec681f3Smrg 27367ec681f3Smrgstruct ac_vs_exp_inst { 27377ec681f3Smrg unsigned offset; 27387ec681f3Smrg LLVMValueRef inst; 27397ec681f3Smrg struct ac_vs_exp_chan chan[4]; 27407ec681f3Smrg}; 27417ec681f3Smrg 27427ec681f3Smrgstruct ac_vs_exports { 27437ec681f3Smrg unsigned num; 27447ec681f3Smrg struct ac_vs_exp_inst exp[VARYING_SLOT_MAX]; 27457ec681f3Smrg}; 27467ec681f3Smrg 27477ec681f3Smrg/* Return true if the PARAM export has been eliminated. */ 27487ec681f3Smrgstatic bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, uint32_t num_outputs, 27497ec681f3Smrg struct ac_vs_exp_inst *exp) 27507ec681f3Smrg{ 27517ec681f3Smrg unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ 27527ec681f3Smrg bool is_zero[4] = {0}, is_one[4] = {0}; 27537ec681f3Smrg 27547ec681f3Smrg for (i = 0; i < 4; i++) { 27557ec681f3Smrg /* It's a constant expression. Undef outputs are eliminated too. */ 27567ec681f3Smrg if (exp->chan[i].type == AC_IR_UNDEF) { 27577ec681f3Smrg is_zero[i] = true; 27587ec681f3Smrg is_one[i] = true; 27597ec681f3Smrg } else if (exp->chan[i].type == AC_IR_CONST) { 27607ec681f3Smrg if (exp->chan[i].const_float == 0) 27617ec681f3Smrg is_zero[i] = true; 27627ec681f3Smrg else if (exp->chan[i].const_float == 1) 27637ec681f3Smrg is_one[i] = true; 27647ec681f3Smrg else 27657ec681f3Smrg return false; /* other constant */ 27667ec681f3Smrg } else 27677ec681f3Smrg return false; 27687ec681f3Smrg } 27697ec681f3Smrg 27707ec681f3Smrg /* Only certain combinations of 0 and 1 can be eliminated. */ 27717ec681f3Smrg if (is_zero[0] && is_zero[1] && is_zero[2]) 27727ec681f3Smrg default_val = is_zero[3] ? 0 : 1; 27737ec681f3Smrg else if (is_one[0] && is_one[1] && is_one[2]) 27747ec681f3Smrg default_val = is_zero[3] ? 2 : 3; 27757ec681f3Smrg else 27767ec681f3Smrg return false; 27777ec681f3Smrg 27787ec681f3Smrg /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ 27797ec681f3Smrg LLVMInstructionEraseFromParent(exp->inst); 27807ec681f3Smrg 27817ec681f3Smrg /* Change OFFSET to DEFAULT_VAL. */ 27827ec681f3Smrg for (i = 0; i < num_outputs; i++) { 27837ec681f3Smrg if (vs_output_param_offset[i] == exp->offset) { 27847ec681f3Smrg vs_output_param_offset[i] = AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val; 27857ec681f3Smrg break; 27867ec681f3Smrg } 27877ec681f3Smrg } 27887ec681f3Smrg return true; 27897ec681f3Smrg} 27907ec681f3Smrg 27917ec681f3Smrgstatic bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx, 27927ec681f3Smrg uint8_t *vs_output_param_offset, uint32_t num_outputs, 27937ec681f3Smrg struct ac_vs_exports *processed, 27947ec681f3Smrg struct ac_vs_exp_inst *exp) 27957ec681f3Smrg{ 27967ec681f3Smrg unsigned p, copy_back_channels = 0; 27977ec681f3Smrg 27987ec681f3Smrg /* See if the output is already in the list of processed outputs. 27997ec681f3Smrg * The LLVMValueRef comparison relies on SSA. 28007ec681f3Smrg */ 28017ec681f3Smrg for (p = 0; p < processed->num; p++) { 28027ec681f3Smrg bool different = false; 28037ec681f3Smrg 28047ec681f3Smrg for (unsigned j = 0; j < 4; j++) { 28057ec681f3Smrg struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j]; 28067ec681f3Smrg struct ac_vs_exp_chan *c2 = &exp->chan[j]; 28077ec681f3Smrg 28087ec681f3Smrg /* Treat undef as a match. */ 28097ec681f3Smrg if (c2->type == AC_IR_UNDEF) 28107ec681f3Smrg continue; 28117ec681f3Smrg 28127ec681f3Smrg /* If c1 is undef but c2 isn't, we can copy c2 to c1 28137ec681f3Smrg * and consider the instruction duplicated. 28147ec681f3Smrg */ 28157ec681f3Smrg if (c1->type == AC_IR_UNDEF) { 28167ec681f3Smrg copy_back_channels |= 1 << j; 28177ec681f3Smrg continue; 28187ec681f3Smrg } 28197ec681f3Smrg 28207ec681f3Smrg /* Test whether the channels are not equal. */ 28217ec681f3Smrg if (c1->type != c2->type || 28227ec681f3Smrg (c1->type == AC_IR_CONST && c1->const_float != c2->const_float) || 28237ec681f3Smrg (c1->type == AC_IR_VALUE && c1->value != c2->value)) { 28247ec681f3Smrg different = true; 28257ec681f3Smrg break; 28267ec681f3Smrg } 28277ec681f3Smrg } 28287ec681f3Smrg if (!different) 28297ec681f3Smrg break; 28307ec681f3Smrg 28317ec681f3Smrg copy_back_channels = 0; 28327ec681f3Smrg } 28337ec681f3Smrg if (p == processed->num) 28347ec681f3Smrg return false; 28357ec681f3Smrg 28367ec681f3Smrg /* If a match was found, but the matching export has undef where the new 28377ec681f3Smrg * one has a normal value, copy the normal value to the undef channel. 28387ec681f3Smrg */ 28397ec681f3Smrg struct ac_vs_exp_inst *match = &processed->exp[p]; 28407ec681f3Smrg 28417ec681f3Smrg /* Get current enabled channels mask. */ 28427ec681f3Smrg LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS); 28437ec681f3Smrg unsigned enabled_channels = LLVMConstIntGetZExtValue(arg); 28447ec681f3Smrg 28457ec681f3Smrg while (copy_back_channels) { 28467ec681f3Smrg unsigned chan = u_bit_scan(©_back_channels); 28477ec681f3Smrg 28487ec681f3Smrg assert(match->chan[chan].type == AC_IR_UNDEF); 28497ec681f3Smrg LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, exp->chan[chan].value); 28507ec681f3Smrg match->chan[chan] = exp->chan[chan]; 28517ec681f3Smrg 28527ec681f3Smrg /* Update number of enabled channels because the original mask 28537ec681f3Smrg * is not always 0xf. 28547ec681f3Smrg */ 28557ec681f3Smrg enabled_channels |= (1 << chan); 28567ec681f3Smrg LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS, 28577ec681f3Smrg LLVMConstInt(ctx->i32, enabled_channels, 0)); 28587ec681f3Smrg } 28597ec681f3Smrg 28607ec681f3Smrg /* The PARAM export is duplicated. Kill it. */ 28617ec681f3Smrg LLVMInstructionEraseFromParent(exp->inst); 28627ec681f3Smrg 28637ec681f3Smrg /* Change OFFSET to the matching export. */ 28647ec681f3Smrg for (unsigned i = 0; i < num_outputs; i++) { 28657ec681f3Smrg if (vs_output_param_offset[i] == exp->offset) { 28667ec681f3Smrg vs_output_param_offset[i] = match->offset; 28677ec681f3Smrg break; 28687ec681f3Smrg } 28697ec681f3Smrg } 28707ec681f3Smrg return true; 28717ec681f3Smrg} 28727ec681f3Smrg 28737ec681f3Smrgvoid ac_optimize_vs_outputs(struct ac_llvm_context *ctx, LLVMValueRef main_fn, 28747ec681f3Smrg uint8_t *vs_output_param_offset, uint32_t num_outputs, 28757ec681f3Smrg uint32_t skip_output_mask, uint8_t *num_param_exports) 28767ec681f3Smrg{ 28777ec681f3Smrg LLVMBasicBlockRef bb; 28787ec681f3Smrg bool removed_any = false; 28797ec681f3Smrg struct ac_vs_exports exports; 28807ec681f3Smrg 28817ec681f3Smrg exports.num = 0; 28827ec681f3Smrg 28837ec681f3Smrg /* Process all LLVM instructions. */ 28847ec681f3Smrg bb = LLVMGetFirstBasicBlock(main_fn); 28857ec681f3Smrg while (bb) { 28867ec681f3Smrg LLVMValueRef inst = LLVMGetFirstInstruction(bb); 28877ec681f3Smrg 28887ec681f3Smrg while (inst) { 28897ec681f3Smrg LLVMValueRef cur = inst; 28907ec681f3Smrg inst = LLVMGetNextInstruction(inst); 28917ec681f3Smrg struct ac_vs_exp_inst exp; 28927ec681f3Smrg 28937ec681f3Smrg if (LLVMGetInstructionOpcode(cur) != LLVMCall) 28947ec681f3Smrg continue; 28957ec681f3Smrg 28967ec681f3Smrg LLVMValueRef callee = ac_llvm_get_called_value(cur); 28977ec681f3Smrg 28987ec681f3Smrg if (!ac_llvm_is_function(callee)) 28997ec681f3Smrg continue; 29007ec681f3Smrg 29017ec681f3Smrg const char *name = LLVMGetValueName(callee); 29027ec681f3Smrg unsigned num_args = LLVMCountParams(callee); 29037ec681f3Smrg 29047ec681f3Smrg /* Check if this is an export instruction. */ 29057ec681f3Smrg if ((num_args != 9 && num_args != 8) || 29067ec681f3Smrg (strcmp(name, "llvm.SI.export") && strcmp(name, "llvm.amdgcn.exp.f32"))) 29077ec681f3Smrg continue; 29087ec681f3Smrg 29097ec681f3Smrg LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); 29107ec681f3Smrg unsigned target = LLVMConstIntGetZExtValue(arg); 29117ec681f3Smrg 29127ec681f3Smrg if (target < V_008DFC_SQ_EXP_PARAM) 29137ec681f3Smrg continue; 29147ec681f3Smrg 29157ec681f3Smrg target -= V_008DFC_SQ_EXP_PARAM; 29167ec681f3Smrg 29177ec681f3Smrg /* Parse the instruction. */ 29187ec681f3Smrg memset(&exp, 0, sizeof(exp)); 29197ec681f3Smrg exp.offset = target; 29207ec681f3Smrg exp.inst = cur; 29217ec681f3Smrg 29227ec681f3Smrg for (unsigned i = 0; i < 4; i++) { 29237ec681f3Smrg LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i); 29247ec681f3Smrg 29257ec681f3Smrg exp.chan[i].value = v; 29267ec681f3Smrg 29277ec681f3Smrg if (LLVMIsUndef(v)) { 29287ec681f3Smrg exp.chan[i].type = AC_IR_UNDEF; 29297ec681f3Smrg } else if (LLVMIsAConstantFP(v)) { 29307ec681f3Smrg LLVMBool loses_info; 29317ec681f3Smrg exp.chan[i].type = AC_IR_CONST; 29327ec681f3Smrg exp.chan[i].const_float = LLVMConstRealGetDouble(v, &loses_info); 29337ec681f3Smrg } else { 29347ec681f3Smrg exp.chan[i].type = AC_IR_VALUE; 29357ec681f3Smrg } 29367ec681f3Smrg } 29377ec681f3Smrg 29387ec681f3Smrg /* Eliminate constant and duplicated PARAM exports. */ 29397ec681f3Smrg if (!((1u << target) & skip_output_mask) && 29407ec681f3Smrg (ac_eliminate_const_output(vs_output_param_offset, num_outputs, &exp) || 29417ec681f3Smrg ac_eliminate_duplicated_output(ctx, vs_output_param_offset, num_outputs, &exports, 29427ec681f3Smrg &exp))) { 29437ec681f3Smrg removed_any = true; 29447ec681f3Smrg } else { 29457ec681f3Smrg exports.exp[exports.num++] = exp; 29467ec681f3Smrg } 29477ec681f3Smrg } 29487ec681f3Smrg bb = LLVMGetNextBasicBlock(bb); 29497ec681f3Smrg } 29507ec681f3Smrg 29517ec681f3Smrg /* Remove holes in export memory due to removed PARAM exports. 29527ec681f3Smrg * This is done by renumbering all PARAM exports. 29537ec681f3Smrg */ 29547ec681f3Smrg if (removed_any) { 29557ec681f3Smrg uint8_t old_offset[VARYING_SLOT_MAX]; 29567ec681f3Smrg unsigned out, i; 29577ec681f3Smrg 29587ec681f3Smrg /* Make a copy of the offsets. We need the old version while 29597ec681f3Smrg * we are modifying some of them. */ 29607ec681f3Smrg memcpy(old_offset, vs_output_param_offset, sizeof(old_offset)); 29617ec681f3Smrg 29627ec681f3Smrg for (i = 0; i < exports.num; i++) { 29637ec681f3Smrg unsigned offset = exports.exp[i].offset; 29647ec681f3Smrg 29657ec681f3Smrg /* Update vs_output_param_offset. Multiple outputs can 29667ec681f3Smrg * have the same offset. 29677ec681f3Smrg */ 29687ec681f3Smrg for (out = 0; out < num_outputs; out++) { 29697ec681f3Smrg if (old_offset[out] == offset) 29707ec681f3Smrg vs_output_param_offset[out] = i; 29717ec681f3Smrg } 29727ec681f3Smrg 29737ec681f3Smrg /* Change the PARAM offset in the instruction. */ 29747ec681f3Smrg LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET, 29757ec681f3Smrg LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_PARAM + i, 0)); 29767ec681f3Smrg } 29777ec681f3Smrg *num_param_exports = exports.num; 29787ec681f3Smrg } 29797ec681f3Smrg} 29807ec681f3Smrg 29817ec681f3Smrgvoid ac_init_exec_full_mask(struct ac_llvm_context *ctx) 29827ec681f3Smrg{ 29837ec681f3Smrg LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); 29847ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.init.exec", ctx->voidt, &full_mask, 1, 29857ec681f3Smrg AC_FUNC_ATTR_CONVERGENT); 29867ec681f3Smrg} 29877ec681f3Smrg 29887ec681f3Smrgvoid ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) 29897ec681f3Smrg{ 29907ec681f3Smrg unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768; 29917ec681f3Smrg ctx->lds = LLVMBuildIntToPtr( 29927ec681f3Smrg ctx->builder, ctx->i32_0, 29937ec681f3Smrg LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds"); 29947ec681f3Smrg} 29957ec681f3Smrg 29967ec681f3SmrgLLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr) 29977ec681f3Smrg{ 29987ec681f3Smrg return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), ""); 29997ec681f3Smrg} 30007ec681f3Smrg 30017ec681f3Smrgvoid ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value) 30027ec681f3Smrg{ 30037ec681f3Smrg value = ac_to_integer(ctx, value); 30047ec681f3Smrg ac_build_indexed_store(ctx, ctx->lds, dw_addr, value); 30057ec681f3Smrg} 30067ec681f3Smrg 30077ec681f3SmrgLLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0) 30087ec681f3Smrg{ 30097ec681f3Smrg unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); 30107ec681f3Smrg const char *intrin_name; 30117ec681f3Smrg LLVMTypeRef type; 30127ec681f3Smrg LLVMValueRef zero; 30137ec681f3Smrg 30147ec681f3Smrg switch (src0_bitsize) { 30157ec681f3Smrg case 64: 30167ec681f3Smrg intrin_name = "llvm.cttz.i64"; 30177ec681f3Smrg type = ctx->i64; 30187ec681f3Smrg zero = ctx->i64_0; 30197ec681f3Smrg break; 30207ec681f3Smrg case 32: 30217ec681f3Smrg intrin_name = "llvm.cttz.i32"; 30227ec681f3Smrg type = ctx->i32; 30237ec681f3Smrg zero = ctx->i32_0; 30247ec681f3Smrg break; 30257ec681f3Smrg case 16: 30267ec681f3Smrg intrin_name = "llvm.cttz.i16"; 30277ec681f3Smrg type = ctx->i16; 30287ec681f3Smrg zero = ctx->i16_0; 30297ec681f3Smrg break; 30307ec681f3Smrg case 8: 30317ec681f3Smrg intrin_name = "llvm.cttz.i8"; 30327ec681f3Smrg type = ctx->i8; 30337ec681f3Smrg zero = ctx->i8_0; 30347ec681f3Smrg break; 30357ec681f3Smrg default: 30367ec681f3Smrg unreachable(!"invalid bitsize"); 30377ec681f3Smrg } 30387ec681f3Smrg 30397ec681f3Smrg LLVMValueRef params[2] = { 30407ec681f3Smrg src0, 30417ec681f3Smrg 30427ec681f3Smrg /* The value of 1 means that ffs(x=0) = undef, so LLVM won't 30437ec681f3Smrg * add special code to check for x=0. The reason is that 30447ec681f3Smrg * the LLVM behavior for x=0 is different from what we 30457ec681f3Smrg * need here. However, LLVM also assumes that ffs(x) is 30467ec681f3Smrg * in [0, 31], but GLSL expects that ffs(0) = -1, so 30477ec681f3Smrg * a conditional assignment to handle 0 is still required. 30487ec681f3Smrg * 30497ec681f3Smrg * The hardware already implements the correct behavior. 30507ec681f3Smrg */ 30517ec681f3Smrg ctx->i1true, 30527ec681f3Smrg }; 30537ec681f3Smrg 30547ec681f3Smrg LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE); 30557ec681f3Smrg 30567ec681f3Smrg if (src0_bitsize == 64) { 30577ec681f3Smrg lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, ""); 30587ec681f3Smrg } else if (src0_bitsize < 32) { 30597ec681f3Smrg lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, ""); 30607ec681f3Smrg } 30617ec681f3Smrg 30627ec681f3Smrg /* TODO: We need an intrinsic to skip this conditional. */ 30637ec681f3Smrg /* Check for zero: */ 30647ec681f3Smrg return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, zero, ""), 30657ec681f3Smrg LLVMConstInt(ctx->i32, -1, 0), lsb, ""); 30667ec681f3Smrg} 30677ec681f3Smrg 30687ec681f3SmrgLLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) 30697ec681f3Smrg{ 30707ec681f3Smrg return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST); 30717ec681f3Smrg} 30727ec681f3Smrg 30737ec681f3SmrgLLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) 30747ec681f3Smrg{ 30757ec681f3Smrg return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT); 30767ec681f3Smrg} 30777ec681f3Smrg 30787ec681f3Smrgstatic struct ac_llvm_flow *get_current_flow(struct ac_llvm_context *ctx) 30797ec681f3Smrg{ 30807ec681f3Smrg if (ctx->flow->depth > 0) 30817ec681f3Smrg return &ctx->flow->stack[ctx->flow->depth - 1]; 30827ec681f3Smrg return NULL; 30837ec681f3Smrg} 30847ec681f3Smrg 30857ec681f3Smrgstatic struct ac_llvm_flow *get_innermost_loop(struct ac_llvm_context *ctx) 30867ec681f3Smrg{ 30877ec681f3Smrg for (unsigned i = ctx->flow->depth; i > 0; --i) { 30887ec681f3Smrg if (ctx->flow->stack[i - 1].loop_entry_block) 30897ec681f3Smrg return &ctx->flow->stack[i - 1]; 30907ec681f3Smrg } 30917ec681f3Smrg return NULL; 30927ec681f3Smrg} 30937ec681f3Smrg 30947ec681f3Smrgstatic struct ac_llvm_flow *push_flow(struct ac_llvm_context *ctx) 30957ec681f3Smrg{ 30967ec681f3Smrg struct ac_llvm_flow *flow; 30977ec681f3Smrg 30987ec681f3Smrg if (ctx->flow->depth >= ctx->flow->depth_max) { 30997ec681f3Smrg unsigned new_max = MAX2(ctx->flow->depth << 1, AC_LLVM_INITIAL_CF_DEPTH); 31007ec681f3Smrg 31017ec681f3Smrg ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack)); 31027ec681f3Smrg ctx->flow->depth_max = new_max; 31037ec681f3Smrg } 31047ec681f3Smrg 31057ec681f3Smrg flow = &ctx->flow->stack[ctx->flow->depth]; 31067ec681f3Smrg ctx->flow->depth++; 31077ec681f3Smrg 31087ec681f3Smrg flow->next_block = NULL; 31097ec681f3Smrg flow->loop_entry_block = NULL; 31107ec681f3Smrg return flow; 31117ec681f3Smrg} 31127ec681f3Smrg 31137ec681f3Smrgstatic void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int label_id) 31147ec681f3Smrg{ 31157ec681f3Smrg char buf[32]; 31167ec681f3Smrg snprintf(buf, sizeof(buf), "%s%d", base, label_id); 31177ec681f3Smrg LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); 31187ec681f3Smrg} 31197ec681f3Smrg 31207ec681f3Smrg/* Append a basic block at the level of the parent flow. 31217ec681f3Smrg */ 31227ec681f3Smrgstatic LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, const char *name) 31237ec681f3Smrg{ 31247ec681f3Smrg assert(ctx->flow->depth >= 1); 31257ec681f3Smrg 31267ec681f3Smrg if (ctx->flow->depth >= 2) { 31277ec681f3Smrg struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2]; 31287ec681f3Smrg 31297ec681f3Smrg return LLVMInsertBasicBlockInContext(ctx->context, flow->next_block, name); 31307ec681f3Smrg } 31317ec681f3Smrg 31327ec681f3Smrg LLVMValueRef main_fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)); 31337ec681f3Smrg return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name); 31347ec681f3Smrg} 31357ec681f3Smrg 31367ec681f3Smrg/* Emit a branch to the given default target for the current block if 31377ec681f3Smrg * applicable -- that is, if the current block does not already contain a 31387ec681f3Smrg * branch from a break or continue. 31397ec681f3Smrg */ 31407ec681f3Smrgstatic void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target) 31417ec681f3Smrg{ 31427ec681f3Smrg if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) 31437ec681f3Smrg LLVMBuildBr(builder, target); 31447ec681f3Smrg} 31457ec681f3Smrg 31467ec681f3Smrgvoid ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id) 31477ec681f3Smrg{ 31487ec681f3Smrg struct ac_llvm_flow *flow = push_flow(ctx); 31497ec681f3Smrg flow->loop_entry_block = append_basic_block(ctx, "LOOP"); 31507ec681f3Smrg flow->next_block = append_basic_block(ctx, "ENDLOOP"); 31517ec681f3Smrg set_basicblock_name(flow->loop_entry_block, "loop", label_id); 31527ec681f3Smrg LLVMBuildBr(ctx->builder, flow->loop_entry_block); 31537ec681f3Smrg LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block); 31547ec681f3Smrg} 31557ec681f3Smrg 31567ec681f3Smrgvoid ac_build_break(struct ac_llvm_context *ctx) 31577ec681f3Smrg{ 31587ec681f3Smrg struct ac_llvm_flow *flow = get_innermost_loop(ctx); 31597ec681f3Smrg LLVMBuildBr(ctx->builder, flow->next_block); 31607ec681f3Smrg} 31617ec681f3Smrg 31627ec681f3Smrgvoid ac_build_continue(struct ac_llvm_context *ctx) 31637ec681f3Smrg{ 31647ec681f3Smrg struct ac_llvm_flow *flow = get_innermost_loop(ctx); 31657ec681f3Smrg LLVMBuildBr(ctx->builder, flow->loop_entry_block); 31667ec681f3Smrg} 31677ec681f3Smrg 31687ec681f3Smrgvoid ac_build_else(struct ac_llvm_context *ctx, int label_id) 31697ec681f3Smrg{ 31707ec681f3Smrg struct ac_llvm_flow *current_branch = get_current_flow(ctx); 31717ec681f3Smrg LLVMBasicBlockRef endif_block; 31727ec681f3Smrg 31737ec681f3Smrg assert(!current_branch->loop_entry_block); 31747ec681f3Smrg 31757ec681f3Smrg endif_block = append_basic_block(ctx, "ENDIF"); 31767ec681f3Smrg emit_default_branch(ctx->builder, endif_block); 31777ec681f3Smrg 31787ec681f3Smrg LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); 31797ec681f3Smrg set_basicblock_name(current_branch->next_block, "else", label_id); 31807ec681f3Smrg 31817ec681f3Smrg current_branch->next_block = endif_block; 31827ec681f3Smrg} 31837ec681f3Smrg 31847ec681f3Smrg/* Invoked after a branch is exited. */ 31857ec681f3Smrgstatic void ac_branch_exited(struct ac_llvm_context *ctx) 31867ec681f3Smrg{ 31877ec681f3Smrg if (ctx->flow->depth == 0 && ctx->conditional_demote_seen) { 31887ec681f3Smrg /* The previous conditional branch contained demote. Kill threads 31897ec681f3Smrg * after all conditional blocks because amdgcn.wqm.vote doesn't 31907ec681f3Smrg * return usable values inside the blocks. 31917ec681f3Smrg * 31927ec681f3Smrg * This is an optional optimization that only kills whole inactive quads. 31937ec681f3Smrg */ 31947ec681f3Smrg LLVMValueRef cond = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, ""); 31957ec681f3Smrg ac_build_kill_if_false(ctx, ac_build_wqm_vote(ctx, cond)); 31967ec681f3Smrg ctx->conditional_demote_seen = false; 31977ec681f3Smrg } 31987ec681f3Smrg} 31997ec681f3Smrg 32007ec681f3Smrgvoid ac_build_endif(struct ac_llvm_context *ctx, int label_id) 32017ec681f3Smrg{ 32027ec681f3Smrg struct ac_llvm_flow *current_branch = get_current_flow(ctx); 32037ec681f3Smrg 32047ec681f3Smrg assert(!current_branch->loop_entry_block); 32057ec681f3Smrg 32067ec681f3Smrg emit_default_branch(ctx->builder, current_branch->next_block); 32077ec681f3Smrg LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); 32087ec681f3Smrg set_basicblock_name(current_branch->next_block, "endif", label_id); 32097ec681f3Smrg 32107ec681f3Smrg ctx->flow->depth--; 32117ec681f3Smrg ac_branch_exited(ctx); 32127ec681f3Smrg} 32137ec681f3Smrg 32147ec681f3Smrgvoid ac_build_endloop(struct ac_llvm_context *ctx, int label_id) 32157ec681f3Smrg{ 32167ec681f3Smrg struct ac_llvm_flow *current_loop = get_current_flow(ctx); 32177ec681f3Smrg 32187ec681f3Smrg assert(current_loop->loop_entry_block); 32197ec681f3Smrg 32207ec681f3Smrg emit_default_branch(ctx->builder, current_loop->loop_entry_block); 32217ec681f3Smrg 32227ec681f3Smrg LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block); 32237ec681f3Smrg set_basicblock_name(current_loop->next_block, "endloop", label_id); 32247ec681f3Smrg ctx->flow->depth--; 32257ec681f3Smrg ac_branch_exited(ctx); 32267ec681f3Smrg} 32277ec681f3Smrg 32287ec681f3Smrgvoid ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id) 32297ec681f3Smrg{ 32307ec681f3Smrg struct ac_llvm_flow *flow = push_flow(ctx); 32317ec681f3Smrg LLVMBasicBlockRef if_block; 32327ec681f3Smrg 32337ec681f3Smrg if_block = append_basic_block(ctx, "IF"); 32347ec681f3Smrg flow->next_block = append_basic_block(ctx, "ELSE"); 32357ec681f3Smrg set_basicblock_name(if_block, "if", label_id); 32367ec681f3Smrg LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block); 32377ec681f3Smrg LLVMPositionBuilderAtEnd(ctx->builder, if_block); 32387ec681f3Smrg} 32397ec681f3Smrg 32407ec681f3SmrgLLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) 32417ec681f3Smrg{ 32427ec681f3Smrg LLVMBuilderRef builder = ac->builder; 32437ec681f3Smrg LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); 32447ec681f3Smrg LLVMValueRef function = LLVMGetBasicBlockParent(current_block); 32457ec681f3Smrg LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); 32467ec681f3Smrg LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); 32477ec681f3Smrg LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context); 32487ec681f3Smrg LLVMValueRef res; 32497ec681f3Smrg 32507ec681f3Smrg if (first_instr) { 32517ec681f3Smrg LLVMPositionBuilderBefore(first_builder, first_instr); 32527ec681f3Smrg } else { 32537ec681f3Smrg LLVMPositionBuilderAtEnd(first_builder, first_block); 32547ec681f3Smrg } 32557ec681f3Smrg 32567ec681f3Smrg res = LLVMBuildAlloca(first_builder, type, name); 32577ec681f3Smrg LLVMDisposeBuilder(first_builder); 32587ec681f3Smrg return res; 32597ec681f3Smrg} 32607ec681f3Smrg 32617ec681f3SmrgLLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) 32627ec681f3Smrg{ 32637ec681f3Smrg LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name); 32647ec681f3Smrg LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr); 32657ec681f3Smrg return ptr; 32667ec681f3Smrg} 32677ec681f3Smrg 32687ec681f3SmrgLLVMValueRef ac_build_alloca_init(struct ac_llvm_context *ac, LLVMValueRef val, const char *name) 32697ec681f3Smrg{ 32707ec681f3Smrg LLVMValueRef ptr = ac_build_alloca_undef(ac, LLVMTypeOf(val), name); 32717ec681f3Smrg LLVMBuildStore(ac->builder, val, ptr); 32727ec681f3Smrg return ptr; 32737ec681f3Smrg} 32747ec681f3Smrg 32757ec681f3SmrgLLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMTypeRef type) 32767ec681f3Smrg{ 32777ec681f3Smrg int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); 32787ec681f3Smrg return LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), ""); 32797ec681f3Smrg} 32807ec681f3Smrg 32817ec681f3SmrgLLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count) 32827ec681f3Smrg{ 32837ec681f3Smrg unsigned num_components = ac_get_llvm_num_components(value); 32847ec681f3Smrg if (count == num_components) 32857ec681f3Smrg return value; 32867ec681f3Smrg 32877ec681f3Smrg LLVMValueRef *const masks = alloca(MAX2(count, 2) * sizeof(LLVMValueRef)); 32887ec681f3Smrg masks[0] = ctx->i32_0; 32897ec681f3Smrg masks[1] = ctx->i32_1; 32907ec681f3Smrg for (unsigned i = 2; i < count; i++) 32917ec681f3Smrg masks[i] = LLVMConstInt(ctx->i32, i, false); 32927ec681f3Smrg 32937ec681f3Smrg if (count == 1) 32947ec681f3Smrg return LLVMBuildExtractElement(ctx->builder, value, masks[0], ""); 32957ec681f3Smrg 32967ec681f3Smrg LLVMValueRef swizzle = LLVMConstVector(masks, count); 32977ec681f3Smrg return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); 32987ec681f3Smrg} 32997ec681f3Smrg 33007ec681f3Smrg/* If param is i64 and bitwidth <= 32, the return value will be i32. */ 33017ec681f3SmrgLLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift, 33027ec681f3Smrg unsigned bitwidth) 33037ec681f3Smrg{ 33047ec681f3Smrg LLVMValueRef value = param; 33057ec681f3Smrg if (rshift) 33067ec681f3Smrg value = LLVMBuildLShr(ctx->builder, value, LLVMConstInt(LLVMTypeOf(param), rshift, false), ""); 33077ec681f3Smrg 33087ec681f3Smrg if (rshift + bitwidth < 32) { 33097ec681f3Smrg uint64_t mask = (1ull << bitwidth) - 1; 33107ec681f3Smrg value = LLVMBuildAnd(ctx->builder, value, LLVMConstInt(LLVMTypeOf(param), mask, false), ""); 33117ec681f3Smrg } 33127ec681f3Smrg 33137ec681f3Smrg if (bitwidth <= 32 && LLVMTypeOf(param) == ctx->i64) 33147ec681f3Smrg value = LLVMBuildTrunc(ctx->builder, value, ctx->i32, ""); 33157ec681f3Smrg return value; 33167ec681f3Smrg} 33177ec681f3Smrg 33187ec681f3Smrg/* Adjust the sample index according to FMASK. 33197ec681f3Smrg * 33207ec681f3Smrg * For uncompressed MSAA surfaces, FMASK should return 0x76543210, 33217ec681f3Smrg * which is the identity mapping. Each nibble says which physical sample 33227ec681f3Smrg * should be fetched to get that sample. 33237ec681f3Smrg * 33247ec681f3Smrg * For example, 0x11111100 means there are only 2 samples stored and 33257ec681f3Smrg * the second sample covers 3/4 of the pixel. When reading samples 0 33267ec681f3Smrg * and 1, return physical sample 0 (determined by the first two 0s 33277ec681f3Smrg * in FMASK), otherwise return physical sample 1. 33287ec681f3Smrg * 33297ec681f3Smrg * The sample index should be adjusted as follows: 33307ec681f3Smrg * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF; 33317ec681f3Smrg */ 33327ec681f3Smrgvoid ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr, 33337ec681f3Smrg bool is_array_tex) 33347ec681f3Smrg{ 33357ec681f3Smrg struct ac_image_args fmask_load = {0}; 33367ec681f3Smrg fmask_load.opcode = ac_image_load; 33377ec681f3Smrg fmask_load.resource = fmask; 33387ec681f3Smrg fmask_load.dmask = 0xf; 33397ec681f3Smrg fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; 33407ec681f3Smrg fmask_load.attributes = AC_FUNC_ATTR_READNONE; 33417ec681f3Smrg 33427ec681f3Smrg fmask_load.coords[0] = addr[0]; 33437ec681f3Smrg fmask_load.coords[1] = addr[1]; 33447ec681f3Smrg if (is_array_tex) 33457ec681f3Smrg fmask_load.coords[2] = addr[2]; 33467ec681f3Smrg fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16; 33477ec681f3Smrg 33487ec681f3Smrg LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); 33497ec681f3Smrg fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, ""); 33507ec681f3Smrg 33517ec681f3Smrg /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK 33527ec681f3Smrg * resource descriptor is 0 (invalid). 33537ec681f3Smrg */ 33547ec681f3Smrg LLVMValueRef tmp; 33557ec681f3Smrg tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, ""); 33567ec681f3Smrg tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, ""); 33577ec681f3Smrg tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, ""); 33587ec681f3Smrg fmask_value = 33597ec681f3Smrg LLVMBuildSelect(ac->builder, tmp, fmask_value, LLVMConstInt(ac->i32, 0x76543210, false), ""); 33607ec681f3Smrg 33617ec681f3Smrg /* Apply the formula. */ 33627ec681f3Smrg unsigned sample_chan = is_array_tex ? 3 : 2; 33637ec681f3Smrg LLVMValueRef final_sample; 33647ec681f3Smrg final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], 33657ec681f3Smrg LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), ""); 33667ec681f3Smrg final_sample = LLVMBuildLShr(ac->builder, fmask_value, 33677ec681f3Smrg LLVMBuildZExt(ac->builder, final_sample, ac->i32, ""), ""); 33687ec681f3Smrg /* Mask the sample index by 0x7, because 0x8 means an unknown value 33697ec681f3Smrg * with EQAA, so those will map to 0. */ 33707ec681f3Smrg addr[sample_chan] = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), ""); 33717ec681f3Smrg if (fmask_load.a16) 33727ec681f3Smrg addr[sample_chan] = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, ""); 33737ec681f3Smrg} 33747ec681f3Smrg 33757ec681f3Smrgstatic LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, 33767ec681f3Smrg LLVMValueRef lane, bool with_opt_barrier) 33777ec681f3Smrg{ 33787ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(src); 33797ec681f3Smrg LLVMValueRef result; 33807ec681f3Smrg 33817ec681f3Smrg if (with_opt_barrier) 33827ec681f3Smrg ac_build_optimization_barrier(ctx, &src, false); 33837ec681f3Smrg 33847ec681f3Smrg src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); 33857ec681f3Smrg if (lane) 33867ec681f3Smrg lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, ""); 33877ec681f3Smrg 33887ec681f3Smrg result = 33897ec681f3Smrg ac_build_intrinsic(ctx, lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane", 33907ec681f3Smrg ctx->i32, (LLVMValueRef[]){src, lane}, lane == NULL ? 1 : 2, 33917ec681f3Smrg AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 33927ec681f3Smrg 33937ec681f3Smrg return LLVMBuildTrunc(ctx->builder, result, type, ""); 33947ec681f3Smrg} 33957ec681f3Smrg 33967ec681f3Smrgstatic LLVMValueRef ac_build_readlane_common(struct ac_llvm_context *ctx, LLVMValueRef src, 33977ec681f3Smrg LLVMValueRef lane, bool with_opt_barrier) 33987ec681f3Smrg{ 33997ec681f3Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 34007ec681f3Smrg src = ac_to_integer(ctx, src); 34017ec681f3Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); 34027ec681f3Smrg LLVMValueRef ret; 34037ec681f3Smrg 34047ec681f3Smrg if (bits > 32) { 34057ec681f3Smrg assert(bits % 32 == 0); 34067ec681f3Smrg LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); 34077ec681f3Smrg LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); 34087ec681f3Smrg ret = LLVMGetUndef(vec_type); 34097ec681f3Smrg for (unsigned i = 0; i < bits / 32; i++) { 34107ec681f3Smrg LLVMValueRef ret_comp; 34117ec681f3Smrg 34127ec681f3Smrg src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); 34137ec681f3Smrg 34147ec681f3Smrg ret_comp = _ac_build_readlane(ctx, src, lane, with_opt_barrier); 34157ec681f3Smrg 34167ec681f3Smrg ret = 34177ec681f3Smrg LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); 34187ec681f3Smrg } 34197ec681f3Smrg } else { 34207ec681f3Smrg ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier); 34217ec681f3Smrg } 34227ec681f3Smrg 34237ec681f3Smrg if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) 34247ec681f3Smrg return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); 34257ec681f3Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 34267ec681f3Smrg} 34277ec681f3Smrg 34287ec681f3Smrg/** 34297ec681f3Smrg * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic. 34307ec681f3Smrg * 34317ec681f3Smrg * The optimization barrier is not needed if the value is the same in all lanes 34327ec681f3Smrg * or if this is called in the outermost block. 34337ec681f3Smrg * 34347ec681f3Smrg * @param ctx 34357ec681f3Smrg * @param src 34367ec681f3Smrg * @param lane - id of the lane or NULL for the first active lane 34377ec681f3Smrg * @return value of the lane 34387ec681f3Smrg */ 34397ec681f3SmrgLLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src, 34407ec681f3Smrg LLVMValueRef lane) 34417ec681f3Smrg{ 34427ec681f3Smrg return ac_build_readlane_common(ctx, src, lane, false); 34437ec681f3Smrg} 34447ec681f3Smrg 34457ec681f3SmrgLLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) 34467ec681f3Smrg{ 34477ec681f3Smrg return ac_build_readlane_common(ctx, src, lane, true); 34487ec681f3Smrg} 34497ec681f3Smrg 34507ec681f3SmrgLLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, 34517ec681f3Smrg LLVMValueRef lane) 34527ec681f3Smrg{ 34537ec681f3Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32, 34547ec681f3Smrg (LLVMValueRef[]){value, lane, src}, 3, 34557ec681f3Smrg AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 34567ec681f3Smrg} 34577ec681f3Smrg 34587ec681f3SmrgLLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src) 34597ec681f3Smrg{ 34607ec681f3Smrg if (ctx->wave_size == 32) { 34617ec681f3Smrg LLVMValueRef val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, 34627ec681f3Smrg (LLVMValueRef[]){mask, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE); 34637ec681f3Smrg ac_set_range_metadata(ctx, val, 0, ctx->wave_size); 34647ec681f3Smrg return val; 34657ec681f3Smrg } 34667ec681f3Smrg LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, ""); 34677ec681f3Smrg LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_0, ""); 34687ec681f3Smrg LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_1, ""); 34697ec681f3Smrg LLVMValueRef val = 34707ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, 34717ec681f3Smrg (LLVMValueRef[]){mask_lo, add_src}, 2, AC_FUNC_ATTR_READNONE); 34727ec681f3Smrg val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, (LLVMValueRef[]){mask_hi, val}, 34737ec681f3Smrg 2, AC_FUNC_ATTR_READNONE); 34747ec681f3Smrg ac_set_range_metadata(ctx, val, 0, ctx->wave_size); 34757ec681f3Smrg return val; 34767ec681f3Smrg} 34777ec681f3Smrg 34787ec681f3SmrgLLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) 34797ec681f3Smrg{ 34807ec681f3Smrg return ac_build_mbcnt_add(ctx, mask, ctx->i32_0); 34817ec681f3Smrg} 34827ec681f3Smrg 34837ec681f3Smrgenum dpp_ctrl 34847ec681f3Smrg{ 34857ec681f3Smrg _dpp_quad_perm = 0x000, 34867ec681f3Smrg _dpp_row_sl = 0x100, 34877ec681f3Smrg _dpp_row_sr = 0x110, 34887ec681f3Smrg _dpp_row_rr = 0x120, 34897ec681f3Smrg dpp_wf_sl1 = 0x130, 34907ec681f3Smrg dpp_wf_rl1 = 0x134, 34917ec681f3Smrg dpp_wf_sr1 = 0x138, 34927ec681f3Smrg dpp_wf_rr1 = 0x13C, 34937ec681f3Smrg dpp_row_mirror = 0x140, 34947ec681f3Smrg dpp_row_half_mirror = 0x141, 34957ec681f3Smrg dpp_row_bcast15 = 0x142, 34967ec681f3Smrg dpp_row_bcast31 = 0x143 34977ec681f3Smrg}; 34987ec681f3Smrg 34997ec681f3Smrgstatic inline enum dpp_ctrl dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, 35007ec681f3Smrg unsigned lane3) 35017ec681f3Smrg{ 35027ec681f3Smrg assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); 35037ec681f3Smrg return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6); 35047ec681f3Smrg} 35057ec681f3Smrg 35067ec681f3Smrgstatic inline enum dpp_ctrl dpp_row_sl(unsigned amount) 35077ec681f3Smrg{ 35087ec681f3Smrg assert(amount > 0 && amount < 16); 35097ec681f3Smrg return _dpp_row_sl | amount; 35107ec681f3Smrg} 35117ec681f3Smrg 35127ec681f3Smrgstatic inline enum dpp_ctrl dpp_row_sr(unsigned amount) 35137ec681f3Smrg{ 35147ec681f3Smrg assert(amount > 0 && amount < 16); 35157ec681f3Smrg return _dpp_row_sr | amount; 35167ec681f3Smrg} 35177ec681f3Smrg 35187ec681f3Smrgstatic LLVMValueRef _ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, 35197ec681f3Smrg enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, 35207ec681f3Smrg bool bound_ctrl) 35217ec681f3Smrg{ 35227ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(src); 35237ec681f3Smrg LLVMValueRef res; 35247ec681f3Smrg 35257ec681f3Smrg old = LLVMBuildZExt(ctx->builder, old, ctx->i32, ""); 35267ec681f3Smrg src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); 35277ec681f3Smrg 35287ec681f3Smrg res = ac_build_intrinsic( 35297ec681f3Smrg ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32, 35307ec681f3Smrg (LLVMValueRef[]){old, src, LLVMConstInt(ctx->i32, dpp_ctrl, 0), 35317ec681f3Smrg LLVMConstInt(ctx->i32, row_mask, 0), LLVMConstInt(ctx->i32, bank_mask, 0), 35327ec681f3Smrg LLVMConstInt(ctx->i1, bound_ctrl, 0)}, 35337ec681f3Smrg 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 35347ec681f3Smrg 35357ec681f3Smrg return LLVMBuildTrunc(ctx->builder, res, type, ""); 35367ec681f3Smrg} 35377ec681f3Smrg 35387ec681f3Smrgstatic LLVMValueRef ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, 35397ec681f3Smrg enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, 35407ec681f3Smrg bool bound_ctrl) 35417ec681f3Smrg{ 35427ec681f3Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 35437ec681f3Smrg src = ac_to_integer(ctx, src); 35447ec681f3Smrg old = ac_to_integer(ctx, old); 35457ec681f3Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); 35467ec681f3Smrg LLVMValueRef ret; 35477ec681f3Smrg if (bits > 32) { 35487ec681f3Smrg assert(bits % 32 == 0); 35497ec681f3Smrg LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); 35507ec681f3Smrg LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); 35517ec681f3Smrg LLVMValueRef old_vector = LLVMBuildBitCast(ctx->builder, old, vec_type, ""); 35527ec681f3Smrg ret = LLVMGetUndef(vec_type); 35537ec681f3Smrg for (unsigned i = 0; i < bits / 32; i++) { 35547ec681f3Smrg src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); 35557ec681f3Smrg old = LLVMBuildExtractElement(ctx->builder, old_vector, LLVMConstInt(ctx->i32, i, 0), ""); 35567ec681f3Smrg LLVMValueRef ret_comp = 35577ec681f3Smrg _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); 35587ec681f3Smrg ret = 35597ec681f3Smrg LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); 35607ec681f3Smrg } 35617ec681f3Smrg } else { 35627ec681f3Smrg ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl); 35637ec681f3Smrg } 35647ec681f3Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 35657ec681f3Smrg} 35667ec681f3Smrg 35677ec681f3Smrgstatic LLVMValueRef _ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, 35687ec681f3Smrg uint64_t sel, bool exchange_rows, bool bound_ctrl) 35697ec681f3Smrg{ 35707ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(src); 35717ec681f3Smrg LLVMValueRef result; 35727ec681f3Smrg 35737ec681f3Smrg src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); 35747ec681f3Smrg 35757ec681f3Smrg LLVMValueRef args[6] = { 35767ec681f3Smrg src, 35777ec681f3Smrg src, 35787ec681f3Smrg LLVMConstInt(ctx->i32, sel, false), 35797ec681f3Smrg LLVMConstInt(ctx->i32, sel >> 32, false), 35807ec681f3Smrg ctx->i1true, /* fi */ 35817ec681f3Smrg bound_ctrl ? ctx->i1true : ctx->i1false, 35827ec681f3Smrg }; 35837ec681f3Smrg 35847ec681f3Smrg result = 35857ec681f3Smrg ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16" : "llvm.amdgcn.permlane16", 35867ec681f3Smrg ctx->i32, args, 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 35877ec681f3Smrg 35887ec681f3Smrg return LLVMBuildTrunc(ctx->builder, result, type, ""); 35897ec681f3Smrg} 35907ec681f3Smrg 35917ec681f3Smrgstatic LLVMValueRef ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel, 35927ec681f3Smrg bool exchange_rows, bool bound_ctrl) 35937ec681f3Smrg{ 35947ec681f3Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 35957ec681f3Smrg src = ac_to_integer(ctx, src); 35967ec681f3Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); 35977ec681f3Smrg LLVMValueRef ret; 35987ec681f3Smrg if (bits > 32) { 35997ec681f3Smrg assert(bits % 32 == 0); 36007ec681f3Smrg LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); 36017ec681f3Smrg LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); 36027ec681f3Smrg ret = LLVMGetUndef(vec_type); 36037ec681f3Smrg for (unsigned i = 0; i < bits / 32; i++) { 36047ec681f3Smrg src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); 36057ec681f3Smrg LLVMValueRef ret_comp = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl); 36067ec681f3Smrg ret = 36077ec681f3Smrg LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); 36087ec681f3Smrg } 36097ec681f3Smrg } else { 36107ec681f3Smrg ret = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl); 36117ec681f3Smrg } 36127ec681f3Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 36137ec681f3Smrg} 36147ec681f3Smrg 36157ec681f3Smrgstatic inline unsigned ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) 36167ec681f3Smrg{ 36177ec681f3Smrg assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); 36187ec681f3Smrg return and_mask | (or_mask << 5) | (xor_mask << 10); 36197ec681f3Smrg} 36207ec681f3Smrg 36217ec681f3Smrgstatic LLVMValueRef _ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, 36227ec681f3Smrg unsigned mask) 36237ec681f3Smrg{ 36247ec681f3Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 36257ec681f3Smrg LLVMValueRef ret; 36267ec681f3Smrg 36277ec681f3Smrg src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); 36287ec681f3Smrg 36297ec681f3Smrg ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32, 36307ec681f3Smrg (LLVMValueRef[]){src, LLVMConstInt(ctx->i32, mask, 0)}, 2, 36317ec681f3Smrg AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 36327ec681f3Smrg 36337ec681f3Smrg return LLVMBuildTrunc(ctx->builder, ret, src_type, ""); 36347ec681f3Smrg} 36357ec681f3Smrg 36367ec681f3SmrgLLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask) 36377ec681f3Smrg{ 36387ec681f3Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 36397ec681f3Smrg src = ac_to_integer(ctx, src); 36407ec681f3Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); 36417ec681f3Smrg LLVMValueRef ret; 36427ec681f3Smrg if (bits > 32) { 36437ec681f3Smrg assert(bits % 32 == 0); 36447ec681f3Smrg LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); 36457ec681f3Smrg LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, ""); 36467ec681f3Smrg ret = LLVMGetUndef(vec_type); 36477ec681f3Smrg for (unsigned i = 0; i < bits / 32; i++) { 36487ec681f3Smrg src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); 36497ec681f3Smrg LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, mask); 36507ec681f3Smrg ret = 36517ec681f3Smrg LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); 36527ec681f3Smrg } 36537ec681f3Smrg } else { 36547ec681f3Smrg ret = _ac_build_ds_swizzle(ctx, src, mask); 36557ec681f3Smrg } 36567ec681f3Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 36577ec681f3Smrg} 36587ec681f3Smrg 36597ec681f3Smrgstatic LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) 36607ec681f3Smrg{ 36617ec681f3Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 36627ec681f3Smrg unsigned bitsize = ac_get_elem_bits(ctx, src_type); 36637ec681f3Smrg char name[32], type[8]; 36647ec681f3Smrg LLVMValueRef ret; 36657ec681f3Smrg 36667ec681f3Smrg src = ac_to_integer(ctx, src); 36677ec681f3Smrg 36687ec681f3Smrg if (bitsize < 32) 36697ec681f3Smrg src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); 36707ec681f3Smrg 36717ec681f3Smrg ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); 36727ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type); 36737ec681f3Smrg ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1, 36747ec681f3Smrg AC_FUNC_ATTR_READNONE); 36757ec681f3Smrg 36767ec681f3Smrg if (bitsize < 32) 36777ec681f3Smrg ret = LLVMBuildTrunc(ctx->builder, ret, ac_to_integer_type(ctx, src_type), ""); 36787ec681f3Smrg 36797ec681f3Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 36807ec681f3Smrg} 36817ec681f3Smrg 36827ec681f3Smrgstatic LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src, 36837ec681f3Smrg LLVMValueRef inactive) 36847ec681f3Smrg{ 36857ec681f3Smrg char name[33], type[8]; 36867ec681f3Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 36877ec681f3Smrg unsigned bitsize = ac_get_elem_bits(ctx, src_type); 36887ec681f3Smrg src = ac_to_integer(ctx, src); 36897ec681f3Smrg inactive = ac_to_integer(ctx, inactive); 36907ec681f3Smrg 36917ec681f3Smrg if (bitsize < 32) { 36927ec681f3Smrg src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); 36937ec681f3Smrg inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, ""); 36947ec681f3Smrg } 36957ec681f3Smrg 36967ec681f3Smrg ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); 36977ec681f3Smrg snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type); 36987ec681f3Smrg LLVMValueRef ret = 36997ec681f3Smrg ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src, inactive}, 2, 37007ec681f3Smrg AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 37017ec681f3Smrg if (bitsize < 32) 37027ec681f3Smrg ret = LLVMBuildTrunc(ctx->builder, ret, src_type, ""); 37037ec681f3Smrg 37047ec681f3Smrg return ret; 37057ec681f3Smrg} 37067ec681f3Smrg 37077ec681f3Smrgstatic LLVMValueRef get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, 37087ec681f3Smrg unsigned type_size) 37097ec681f3Smrg{ 37107ec681f3Smrg 37117ec681f3Smrg if (type_size == 0) { 37127ec681f3Smrg switch (op) { 37137ec681f3Smrg case nir_op_ior: 37147ec681f3Smrg case nir_op_ixor: 37157ec681f3Smrg return LLVMConstInt(ctx->i1, 0, 0); 37167ec681f3Smrg case nir_op_iand: 37177ec681f3Smrg return LLVMConstInt(ctx->i1, 1, 0); 37187ec681f3Smrg default: 37197ec681f3Smrg unreachable("bad reduction intrinsic"); 37207ec681f3Smrg } 37217ec681f3Smrg } else if (type_size == 1) { 37227ec681f3Smrg switch (op) { 37237ec681f3Smrg case nir_op_iadd: 37247ec681f3Smrg return ctx->i8_0; 37257ec681f3Smrg case nir_op_imul: 37267ec681f3Smrg return ctx->i8_1; 37277ec681f3Smrg case nir_op_imin: 37287ec681f3Smrg return LLVMConstInt(ctx->i8, INT8_MAX, 0); 37297ec681f3Smrg case nir_op_umin: 37307ec681f3Smrg return LLVMConstInt(ctx->i8, UINT8_MAX, 0); 37317ec681f3Smrg case nir_op_imax: 37327ec681f3Smrg return LLVMConstInt(ctx->i8, INT8_MIN, 0); 37337ec681f3Smrg case nir_op_umax: 37347ec681f3Smrg return ctx->i8_0; 37357ec681f3Smrg case nir_op_iand: 37367ec681f3Smrg return LLVMConstInt(ctx->i8, -1, 0); 37377ec681f3Smrg case nir_op_ior: 37387ec681f3Smrg return ctx->i8_0; 37397ec681f3Smrg case nir_op_ixor: 37407ec681f3Smrg return ctx->i8_0; 37417ec681f3Smrg default: 37427ec681f3Smrg unreachable("bad reduction intrinsic"); 37437ec681f3Smrg } 37447ec681f3Smrg } else if (type_size == 2) { 37457ec681f3Smrg switch (op) { 37467ec681f3Smrg case nir_op_iadd: 37477ec681f3Smrg return ctx->i16_0; 37487ec681f3Smrg case nir_op_fadd: 37497ec681f3Smrg return ctx->f16_0; 37507ec681f3Smrg case nir_op_imul: 37517ec681f3Smrg return ctx->i16_1; 37527ec681f3Smrg case nir_op_fmul: 37537ec681f3Smrg return ctx->f16_1; 37547ec681f3Smrg case nir_op_imin: 37557ec681f3Smrg return LLVMConstInt(ctx->i16, INT16_MAX, 0); 37567ec681f3Smrg case nir_op_umin: 37577ec681f3Smrg return LLVMConstInt(ctx->i16, UINT16_MAX, 0); 37587ec681f3Smrg case nir_op_fmin: 37597ec681f3Smrg return LLVMConstReal(ctx->f16, INFINITY); 37607ec681f3Smrg case nir_op_imax: 37617ec681f3Smrg return LLVMConstInt(ctx->i16, INT16_MIN, 0); 37627ec681f3Smrg case nir_op_umax: 37637ec681f3Smrg return ctx->i16_0; 37647ec681f3Smrg case nir_op_fmax: 37657ec681f3Smrg return LLVMConstReal(ctx->f16, -INFINITY); 37667ec681f3Smrg case nir_op_iand: 37677ec681f3Smrg return LLVMConstInt(ctx->i16, -1, 0); 37687ec681f3Smrg case nir_op_ior: 37697ec681f3Smrg return ctx->i16_0; 37707ec681f3Smrg case nir_op_ixor: 37717ec681f3Smrg return ctx->i16_0; 37727ec681f3Smrg default: 37737ec681f3Smrg unreachable("bad reduction intrinsic"); 37747ec681f3Smrg } 37757ec681f3Smrg } else if (type_size == 4) { 37767ec681f3Smrg switch (op) { 37777ec681f3Smrg case nir_op_iadd: 37787ec681f3Smrg return ctx->i32_0; 37797ec681f3Smrg case nir_op_fadd: 37807ec681f3Smrg return ctx->f32_0; 37817ec681f3Smrg case nir_op_imul: 37827ec681f3Smrg return ctx->i32_1; 37837ec681f3Smrg case nir_op_fmul: 37847ec681f3Smrg return ctx->f32_1; 37857ec681f3Smrg case nir_op_imin: 37867ec681f3Smrg return LLVMConstInt(ctx->i32, INT32_MAX, 0); 37877ec681f3Smrg case nir_op_umin: 37887ec681f3Smrg return LLVMConstInt(ctx->i32, UINT32_MAX, 0); 37897ec681f3Smrg case nir_op_fmin: 37907ec681f3Smrg return LLVMConstReal(ctx->f32, INFINITY); 37917ec681f3Smrg case nir_op_imax: 37927ec681f3Smrg return LLVMConstInt(ctx->i32, INT32_MIN, 0); 37937ec681f3Smrg case nir_op_umax: 37947ec681f3Smrg return ctx->i32_0; 37957ec681f3Smrg case nir_op_fmax: 37967ec681f3Smrg return LLVMConstReal(ctx->f32, -INFINITY); 37977ec681f3Smrg case nir_op_iand: 37987ec681f3Smrg return LLVMConstInt(ctx->i32, -1, 0); 37997ec681f3Smrg case nir_op_ior: 38007ec681f3Smrg return ctx->i32_0; 38017ec681f3Smrg case nir_op_ixor: 38027ec681f3Smrg return ctx->i32_0; 38037ec681f3Smrg default: 38047ec681f3Smrg unreachable("bad reduction intrinsic"); 38057ec681f3Smrg } 38067ec681f3Smrg } else { /* type_size == 64bit */ 38077ec681f3Smrg switch (op) { 38087ec681f3Smrg case nir_op_iadd: 38097ec681f3Smrg return ctx->i64_0; 38107ec681f3Smrg case nir_op_fadd: 38117ec681f3Smrg return ctx->f64_0; 38127ec681f3Smrg case nir_op_imul: 38137ec681f3Smrg return ctx->i64_1; 38147ec681f3Smrg case nir_op_fmul: 38157ec681f3Smrg return ctx->f64_1; 38167ec681f3Smrg case nir_op_imin: 38177ec681f3Smrg return LLVMConstInt(ctx->i64, INT64_MAX, 0); 38187ec681f3Smrg case nir_op_umin: 38197ec681f3Smrg return LLVMConstInt(ctx->i64, UINT64_MAX, 0); 38207ec681f3Smrg case nir_op_fmin: 38217ec681f3Smrg return LLVMConstReal(ctx->f64, INFINITY); 38227ec681f3Smrg case nir_op_imax: 38237ec681f3Smrg return LLVMConstInt(ctx->i64, INT64_MIN, 0); 38247ec681f3Smrg case nir_op_umax: 38257ec681f3Smrg return ctx->i64_0; 38267ec681f3Smrg case nir_op_fmax: 38277ec681f3Smrg return LLVMConstReal(ctx->f64, -INFINITY); 38287ec681f3Smrg case nir_op_iand: 38297ec681f3Smrg return LLVMConstInt(ctx->i64, -1, 0); 38307ec681f3Smrg case nir_op_ior: 38317ec681f3Smrg return ctx->i64_0; 38327ec681f3Smrg case nir_op_ixor: 38337ec681f3Smrg return ctx->i64_0; 38347ec681f3Smrg default: 38357ec681f3Smrg unreachable("bad reduction intrinsic"); 38367ec681f3Smrg } 38377ec681f3Smrg } 38387ec681f3Smrg} 38397ec681f3Smrg 38407ec681f3Smrgstatic LLVMValueRef ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, 38417ec681f3Smrg nir_op op) 38427ec681f3Smrg{ 38437ec681f3Smrg bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8; 38447ec681f3Smrg bool _32bit = ac_get_type_size(LLVMTypeOf(lhs)) == 4; 38457ec681f3Smrg switch (op) { 38467ec681f3Smrg case nir_op_iadd: 38477ec681f3Smrg return LLVMBuildAdd(ctx->builder, lhs, rhs, ""); 38487ec681f3Smrg case nir_op_fadd: 38497ec681f3Smrg return LLVMBuildFAdd(ctx->builder, lhs, rhs, ""); 38507ec681f3Smrg case nir_op_imul: 38517ec681f3Smrg return LLVMBuildMul(ctx->builder, lhs, rhs, ""); 38527ec681f3Smrg case nir_op_fmul: 38537ec681f3Smrg return LLVMBuildFMul(ctx->builder, lhs, rhs, ""); 38547ec681f3Smrg case nir_op_imin: 38557ec681f3Smrg return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""), 38567ec681f3Smrg lhs, rhs, ""); 38577ec681f3Smrg case nir_op_umin: 38587ec681f3Smrg return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""), 38597ec681f3Smrg lhs, rhs, ""); 38607ec681f3Smrg case nir_op_fmin: 38617ec681f3Smrg return ac_build_intrinsic( 38627ec681f3Smrg ctx, _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16", 38637ec681f3Smrg _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2, 38647ec681f3Smrg AC_FUNC_ATTR_READNONE); 38657ec681f3Smrg case nir_op_imax: 38667ec681f3Smrg return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""), 38677ec681f3Smrg lhs, rhs, ""); 38687ec681f3Smrg case nir_op_umax: 38697ec681f3Smrg return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""), 38707ec681f3Smrg lhs, rhs, ""); 38717ec681f3Smrg case nir_op_fmax: 38727ec681f3Smrg return ac_build_intrinsic( 38737ec681f3Smrg ctx, _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16", 38747ec681f3Smrg _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2, 38757ec681f3Smrg AC_FUNC_ATTR_READNONE); 38767ec681f3Smrg case nir_op_iand: 38777ec681f3Smrg return LLVMBuildAnd(ctx->builder, lhs, rhs, ""); 38787ec681f3Smrg case nir_op_ior: 38797ec681f3Smrg return LLVMBuildOr(ctx->builder, lhs, rhs, ""); 38807ec681f3Smrg case nir_op_ixor: 38817ec681f3Smrg return LLVMBuildXor(ctx->builder, lhs, rhs, ""); 38827ec681f3Smrg default: 38837ec681f3Smrg unreachable("bad reduction intrinsic"); 38847ec681f3Smrg } 38857ec681f3Smrg} 38867ec681f3Smrg 38877ec681f3Smrg/** 38887ec681f3Smrg * \param src The value to shift. 38897ec681f3Smrg * \param identity The value to use the first lane. 38907ec681f3Smrg * \param maxprefix specifies that the result only needs to be correct for a 38917ec681f3Smrg * prefix of this many threads 38927ec681f3Smrg * \return src, shifted 1 lane up, and identity shifted into lane 0. 38937ec681f3Smrg */ 38947ec681f3Smrgstatic LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src, 38957ec681f3Smrg LLVMValueRef identity, unsigned maxprefix) 38967ec681f3Smrg{ 38977ec681f3Smrg if (ctx->chip_class >= GFX10) { 38987ec681f3Smrg /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */ 38997ec681f3Smrg LLVMValueRef active, tmp1, tmp2; 39007ec681f3Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 39017ec681f3Smrg 39027ec681f3Smrg tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); 39037ec681f3Smrg 39047ec681f3Smrg tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false); 39057ec681f3Smrg 39067ec681f3Smrg if (maxprefix > 32) { 39077ec681f3Smrg active = 39087ec681f3Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, false), ""); 39097ec681f3Smrg 39107ec681f3Smrg tmp2 = LLVMBuildSelect(ctx->builder, active, 39117ec681f3Smrg ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, false)), 39127ec681f3Smrg tmp2, ""); 39137ec681f3Smrg 39147ec681f3Smrg active = LLVMBuildOr( 39157ec681f3Smrg ctx->builder, active, 39167ec681f3Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, 39177ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, false), ""), 39187ec681f3Smrg LLVMConstInt(ctx->i32, 0x10, false), ""), 39197ec681f3Smrg ""); 39207ec681f3Smrg return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); 39217ec681f3Smrg } else if (maxprefix > 16) { 39227ec681f3Smrg active = 39237ec681f3Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 16, false), ""); 39247ec681f3Smrg 39257ec681f3Smrg return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); 39267ec681f3Smrg } 39277ec681f3Smrg } else if (ctx->chip_class >= GFX8) { 39287ec681f3Smrg return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false); 39297ec681f3Smrg } 39307ec681f3Smrg 39317ec681f3Smrg /* wavefront shift_right by 1 on SI/CI */ 39327ec681f3Smrg LLVMValueRef active, tmp1, tmp2; 39337ec681f3Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 39347ec681f3Smrg tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2)); 39357ec681f3Smrg tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00)); 39367ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, 39377ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""), 39387ec681f3Smrg LLVMConstInt(ctx->i32, 0x4, 0), ""); 39397ec681f3Smrg tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); 39407ec681f3Smrg tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00)); 39417ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, 39427ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""), 39437ec681f3Smrg LLVMConstInt(ctx->i32, 0x8, 0), ""); 39447ec681f3Smrg tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); 39457ec681f3Smrg tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00)); 39467ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, 39477ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""), 39487ec681f3Smrg LLVMConstInt(ctx->i32, 0x10, 0), ""); 39497ec681f3Smrg tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); 39507ec681f3Smrg tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0)); 39517ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), ""); 39527ec681f3Smrg tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, ""); 39537ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), ""); 39547ec681f3Smrg return LLVMBuildSelect(ctx->builder, active, identity, tmp1, ""); 39557ec681f3Smrg} 39567ec681f3Smrg 39577ec681f3Smrg/** 39587ec681f3Smrg * \param maxprefix specifies that the result only needs to be correct for a 39597ec681f3Smrg * prefix of this many threads 39607ec681f3Smrg */ 39617ec681f3Smrgstatic LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, 39627ec681f3Smrg LLVMValueRef identity, unsigned maxprefix, bool inclusive) 39637ec681f3Smrg{ 39647ec681f3Smrg LLVMValueRef result, tmp; 39657ec681f3Smrg 39667ec681f3Smrg if (!inclusive) 39677ec681f3Smrg src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix); 39687ec681f3Smrg 39697ec681f3Smrg result = src; 39707ec681f3Smrg 39717ec681f3Smrg if (ctx->chip_class <= GFX7) { 39727ec681f3Smrg assert(maxprefix == 64); 39737ec681f3Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 39747ec681f3Smrg LLVMValueRef active; 39757ec681f3Smrg tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 0x00, 0x00)); 39767ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntNE, 39777ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""), ctx->i32_0, ""); 39787ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 39797ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 39807ec681f3Smrg tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 0x01, 0x00)); 39817ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntNE, 39827ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 2, 0), ""), 39837ec681f3Smrg ctx->i32_0, ""); 39847ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 39857ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 39867ec681f3Smrg tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 0x03, 0x00)); 39877ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntNE, 39887ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 4, 0), ""), 39897ec681f3Smrg ctx->i32_0, ""); 39907ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 39917ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 39927ec681f3Smrg tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 0x07, 0x00)); 39937ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntNE, 39947ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 8, 0), ""), 39957ec681f3Smrg ctx->i32_0, ""); 39967ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 39977ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 39987ec681f3Smrg tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 0x0f, 0x00)); 39997ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntNE, 40007ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, 0), ""), 40017ec681f3Smrg ctx->i32_0, ""); 40027ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 40037ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40047ec681f3Smrg tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 0)); 40057ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntNE, 40067ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 32, 0), ""), 40077ec681f3Smrg ctx->i32_0, ""); 40087ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 40097ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40107ec681f3Smrg return result; 40117ec681f3Smrg } 40127ec681f3Smrg 40137ec681f3Smrg if (maxprefix <= 1) 40147ec681f3Smrg return result; 40157ec681f3Smrg tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); 40167ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40177ec681f3Smrg if (maxprefix <= 2) 40187ec681f3Smrg return result; 40197ec681f3Smrg tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false); 40207ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40217ec681f3Smrg if (maxprefix <= 3) 40227ec681f3Smrg return result; 40237ec681f3Smrg tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false); 40247ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40257ec681f3Smrg if (maxprefix <= 4) 40267ec681f3Smrg return result; 40277ec681f3Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false); 40287ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40297ec681f3Smrg if (maxprefix <= 8) 40307ec681f3Smrg return result; 40317ec681f3Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false); 40327ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40337ec681f3Smrg if (maxprefix <= 16) 40347ec681f3Smrg return result; 40357ec681f3Smrg 40367ec681f3Smrg if (ctx->chip_class >= GFX10) { 40377ec681f3Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 40387ec681f3Smrg LLVMValueRef active; 40397ec681f3Smrg 40407ec681f3Smrg tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false); 40417ec681f3Smrg 40427ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntNE, 40437ec681f3Smrg LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, false), ""), 40447ec681f3Smrg ctx->i32_0, ""); 40457ec681f3Smrg 40467ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 40477ec681f3Smrg 40487ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40497ec681f3Smrg 40507ec681f3Smrg if (maxprefix <= 32) 40517ec681f3Smrg return result; 40527ec681f3Smrg 40537ec681f3Smrg tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false)); 40547ec681f3Smrg 40557ec681f3Smrg active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid, LLVMConstInt(ctx->i32, 32, false), ""); 40567ec681f3Smrg 40577ec681f3Smrg tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, ""); 40587ec681f3Smrg 40597ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40607ec681f3Smrg return result; 40617ec681f3Smrg } 40627ec681f3Smrg 40637ec681f3Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); 40647ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40657ec681f3Smrg if (maxprefix <= 32) 40667ec681f3Smrg return result; 40677ec681f3Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); 40687ec681f3Smrg result = ac_build_alu_op(ctx, result, tmp, op); 40697ec681f3Smrg return result; 40707ec681f3Smrg} 40717ec681f3Smrg 40727ec681f3SmrgLLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) 40737ec681f3Smrg{ 40747ec681f3Smrg LLVMValueRef result; 40757ec681f3Smrg 40767ec681f3Smrg if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { 40777ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 40787ec681f3Smrg src = LLVMBuildZExt(builder, src, ctx->i32, ""); 40797ec681f3Smrg result = ac_build_ballot(ctx, src); 40807ec681f3Smrg result = ac_build_mbcnt(ctx, result); 40817ec681f3Smrg result = LLVMBuildAdd(builder, result, src, ""); 40827ec681f3Smrg return result; 40837ec681f3Smrg } 40847ec681f3Smrg 40857ec681f3Smrg ac_build_optimization_barrier(ctx, &src, false); 40867ec681f3Smrg 40877ec681f3Smrg LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); 40887ec681f3Smrg result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), 40897ec681f3Smrg LLVMTypeOf(identity), ""); 40907ec681f3Smrg result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true); 40917ec681f3Smrg 40927ec681f3Smrg return ac_build_wwm(ctx, result); 40937ec681f3Smrg} 40947ec681f3Smrg 40957ec681f3SmrgLLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) 40967ec681f3Smrg{ 40977ec681f3Smrg LLVMValueRef result; 40987ec681f3Smrg 40997ec681f3Smrg if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { 41007ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 41017ec681f3Smrg src = LLVMBuildZExt(builder, src, ctx->i32, ""); 41027ec681f3Smrg result = ac_build_ballot(ctx, src); 41037ec681f3Smrg result = ac_build_mbcnt(ctx, result); 41047ec681f3Smrg return result; 41057ec681f3Smrg } 41067ec681f3Smrg 41077ec681f3Smrg ac_build_optimization_barrier(ctx, &src, false); 41087ec681f3Smrg 41097ec681f3Smrg LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); 41107ec681f3Smrg result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), 41117ec681f3Smrg LLVMTypeOf(identity), ""); 41127ec681f3Smrg result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false); 41137ec681f3Smrg 41147ec681f3Smrg return ac_build_wwm(ctx, result); 41157ec681f3Smrg} 41167ec681f3Smrg 41177ec681f3SmrgLLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, 41187ec681f3Smrg unsigned cluster_size) 41197ec681f3Smrg{ 41207ec681f3Smrg if (cluster_size == 1) 41217ec681f3Smrg return src; 41227ec681f3Smrg ac_build_optimization_barrier(ctx, &src, false); 41237ec681f3Smrg LLVMValueRef result, swap; 41247ec681f3Smrg LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); 41257ec681f3Smrg result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), 41267ec681f3Smrg LLVMTypeOf(identity), ""); 41277ec681f3Smrg swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2); 41287ec681f3Smrg result = ac_build_alu_op(ctx, result, swap, op); 41297ec681f3Smrg if (cluster_size == 2) 41307ec681f3Smrg return ac_build_wwm(ctx, result); 41317ec681f3Smrg 41327ec681f3Smrg swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1); 41337ec681f3Smrg result = ac_build_alu_op(ctx, result, swap, op); 41347ec681f3Smrg if (cluster_size == 4) 41357ec681f3Smrg return ac_build_wwm(ctx, result); 41367ec681f3Smrg 41377ec681f3Smrg if (ctx->chip_class >= GFX8) 41387ec681f3Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false); 41397ec681f3Smrg else 41407ec681f3Smrg swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04)); 41417ec681f3Smrg result = ac_build_alu_op(ctx, result, swap, op); 41427ec681f3Smrg if (cluster_size == 8) 41437ec681f3Smrg return ac_build_wwm(ctx, result); 41447ec681f3Smrg 41457ec681f3Smrg if (ctx->chip_class >= GFX8) 41467ec681f3Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false); 41477ec681f3Smrg else 41487ec681f3Smrg swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08)); 41497ec681f3Smrg result = ac_build_alu_op(ctx, result, swap, op); 41507ec681f3Smrg if (cluster_size == 16) 41517ec681f3Smrg return ac_build_wwm(ctx, result); 41527ec681f3Smrg 41537ec681f3Smrg if (ctx->chip_class >= GFX10) 41547ec681f3Smrg swap = ac_build_permlane16(ctx, result, 0, true, false); 41557ec681f3Smrg else if (ctx->chip_class >= GFX8 && cluster_size != 32) 41567ec681f3Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); 41577ec681f3Smrg else 41587ec681f3Smrg swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10)); 41597ec681f3Smrg result = ac_build_alu_op(ctx, result, swap, op); 41607ec681f3Smrg if (cluster_size == 32) 41617ec681f3Smrg return ac_build_wwm(ctx, result); 41627ec681f3Smrg 41637ec681f3Smrg if (ctx->chip_class >= GFX8) { 41647ec681f3Smrg if (ctx->wave_size == 64) { 41657ec681f3Smrg if (ctx->chip_class >= GFX10) 41667ec681f3Smrg swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false)); 41677ec681f3Smrg else 41687ec681f3Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); 41697ec681f3Smrg result = ac_build_alu_op(ctx, result, swap, op); 41707ec681f3Smrg result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0)); 41717ec681f3Smrg } 41727ec681f3Smrg 41737ec681f3Smrg return ac_build_wwm(ctx, result); 41747ec681f3Smrg } else { 41757ec681f3Smrg swap = ac_build_readlane(ctx, result, ctx->i32_0); 41767ec681f3Smrg result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0)); 41777ec681f3Smrg result = ac_build_alu_op(ctx, result, swap, op); 41787ec681f3Smrg return ac_build_wwm(ctx, result); 41797ec681f3Smrg } 41807ec681f3Smrg} 41817ec681f3Smrg 41827ec681f3Smrg/** 41837ec681f3Smrg * "Top half" of a scan that reduces per-wave values across an entire 41847ec681f3Smrg * workgroup. 41857ec681f3Smrg * 41867ec681f3Smrg * The source value must be present in the highest lane of the wave, and the 41877ec681f3Smrg * highest lane must be live. 41887ec681f3Smrg */ 41897ec681f3Smrgvoid ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 41907ec681f3Smrg{ 41917ec681f3Smrg if (ws->maxwaves <= 1) 41927ec681f3Smrg return; 41937ec681f3Smrg 41947ec681f3Smrg const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false); 41957ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 41967ec681f3Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 41977ec681f3Smrg LLVMValueRef tmp; 41987ec681f3Smrg 41997ec681f3Smrg tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, ""); 42007ec681f3Smrg ac_build_ifcc(ctx, tmp, 1000); 42017ec681f3Smrg LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, "")); 42027ec681f3Smrg ac_build_endif(ctx, 1000); 42037ec681f3Smrg} 42047ec681f3Smrg 42057ec681f3Smrg/** 42067ec681f3Smrg * "Bottom half" of a scan that reduces per-wave values across an entire 42077ec681f3Smrg * workgroup. 42087ec681f3Smrg * 42097ec681f3Smrg * The caller must place a barrier between the top and bottom halves. 42107ec681f3Smrg */ 42117ec681f3Smrgvoid ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 42127ec681f3Smrg{ 42137ec681f3Smrg const LLVMTypeRef type = LLVMTypeOf(ws->src); 42147ec681f3Smrg const LLVMValueRef identity = get_reduction_identity(ctx, ws->op, ac_get_type_size(type)); 42157ec681f3Smrg 42167ec681f3Smrg if (ws->maxwaves <= 1) { 42177ec681f3Smrg ws->result_reduce = ws->src; 42187ec681f3Smrg ws->result_inclusive = ws->src; 42197ec681f3Smrg ws->result_exclusive = identity; 42207ec681f3Smrg return; 42217ec681f3Smrg } 42227ec681f3Smrg assert(ws->maxwaves <= 32); 42237ec681f3Smrg 42247ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 42257ec681f3Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 42267ec681f3Smrg LLVMBasicBlockRef bbs[2]; 42277ec681f3Smrg LLVMValueRef phivalues_scan[2]; 42287ec681f3Smrg LLVMValueRef tmp, tmp2; 42297ec681f3Smrg 42307ec681f3Smrg bbs[0] = LLVMGetInsertBlock(builder); 42317ec681f3Smrg phivalues_scan[0] = LLVMGetUndef(type); 42327ec681f3Smrg 42337ec681f3Smrg if (ws->enable_reduce) 42347ec681f3Smrg tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, ""); 42357ec681f3Smrg else if (ws->enable_inclusive) 42367ec681f3Smrg tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, ""); 42377ec681f3Smrg else 42387ec681f3Smrg tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, ""); 42397ec681f3Smrg ac_build_ifcc(ctx, tmp, 1001); 42407ec681f3Smrg { 42417ec681f3Smrg tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), ""); 42427ec681f3Smrg 42437ec681f3Smrg ac_build_optimization_barrier(ctx, &tmp, false); 42447ec681f3Smrg 42457ec681f3Smrg bbs[1] = LLVMGetInsertBlock(builder); 42467ec681f3Smrg phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true); 42477ec681f3Smrg } 42487ec681f3Smrg ac_build_endif(ctx, 1001); 42497ec681f3Smrg 42507ec681f3Smrg const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs); 42517ec681f3Smrg 42527ec681f3Smrg if (ws->enable_reduce) { 42537ec681f3Smrg tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, ""); 42547ec681f3Smrg ws->result_reduce = ac_build_readlane(ctx, scan, tmp); 42557ec681f3Smrg } 42567ec681f3Smrg if (ws->enable_inclusive) 42577ec681f3Smrg ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx); 42587ec681f3Smrg if (ws->enable_exclusive) { 42597ec681f3Smrg tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, ""); 42607ec681f3Smrg tmp = ac_build_readlane(ctx, scan, tmp); 42617ec681f3Smrg tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, ""); 42627ec681f3Smrg ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, ""); 42637ec681f3Smrg } 42647ec681f3Smrg} 42657ec681f3Smrg 42667ec681f3Smrg/** 42677ec681f3Smrg * Inclusive scan of a per-wave value across an entire workgroup. 42687ec681f3Smrg * 42697ec681f3Smrg * This implies an s_barrier instruction. 42707ec681f3Smrg * 42717ec681f3Smrg * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads 42727ec681f3Smrg * of the workgroup are live. (This requirement cannot easily be relaxed in a 42737ec681f3Smrg * useful manner because of the barrier in the algorithm.) 42747ec681f3Smrg */ 42757ec681f3Smrgvoid ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 42767ec681f3Smrg{ 42777ec681f3Smrg ac_build_wg_wavescan_top(ctx, ws); 42787ec681f3Smrg ac_build_s_barrier(ctx); 42797ec681f3Smrg ac_build_wg_wavescan_bottom(ctx, ws); 42807ec681f3Smrg} 42817ec681f3Smrg 42827ec681f3Smrg/** 42837ec681f3Smrg * "Top half" of a scan that reduces per-thread values across an entire 42847ec681f3Smrg * workgroup. 42857ec681f3Smrg * 42867ec681f3Smrg * All lanes must be active when this code runs. 42877ec681f3Smrg */ 42887ec681f3Smrgvoid ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 42897ec681f3Smrg{ 42907ec681f3Smrg if (ws->enable_exclusive) { 42917ec681f3Smrg ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op); 42927ec681f3Smrg if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd) 42937ec681f3Smrg ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, ""); 42947ec681f3Smrg ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op); 42957ec681f3Smrg } else { 42967ec681f3Smrg ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op); 42977ec681f3Smrg } 42987ec681f3Smrg 42997ec681f3Smrg bool enable_inclusive = ws->enable_inclusive; 43007ec681f3Smrg bool enable_exclusive = ws->enable_exclusive; 43017ec681f3Smrg ws->enable_inclusive = false; 43027ec681f3Smrg ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; 43037ec681f3Smrg ac_build_wg_wavescan_top(ctx, ws); 43047ec681f3Smrg ws->enable_inclusive = enable_inclusive; 43057ec681f3Smrg ws->enable_exclusive = enable_exclusive; 43067ec681f3Smrg} 43077ec681f3Smrg 43087ec681f3Smrg/** 43097ec681f3Smrg * "Bottom half" of a scan that reduces per-thread values across an entire 43107ec681f3Smrg * workgroup. 43117ec681f3Smrg * 43127ec681f3Smrg * The caller must place a barrier between the top and bottom halves. 43137ec681f3Smrg */ 43147ec681f3Smrgvoid ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 43157ec681f3Smrg{ 43167ec681f3Smrg bool enable_inclusive = ws->enable_inclusive; 43177ec681f3Smrg bool enable_exclusive = ws->enable_exclusive; 43187ec681f3Smrg ws->enable_inclusive = false; 43197ec681f3Smrg ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; 43207ec681f3Smrg ac_build_wg_wavescan_bottom(ctx, ws); 43217ec681f3Smrg ws->enable_inclusive = enable_inclusive; 43227ec681f3Smrg ws->enable_exclusive = enable_exclusive; 43237ec681f3Smrg 43247ec681f3Smrg /* ws->result_reduce is already the correct value */ 43257ec681f3Smrg if (ws->enable_inclusive) 43267ec681f3Smrg ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op); 43277ec681f3Smrg if (ws->enable_exclusive) 43287ec681f3Smrg ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op); 43297ec681f3Smrg} 43307ec681f3Smrg 43317ec681f3Smrg/** 43327ec681f3Smrg * A scan that reduces per-thread values across an entire workgroup. 43337ec681f3Smrg * 43347ec681f3Smrg * The caller must ensure that all lanes are active when this code runs 43357ec681f3Smrg * (WWM is insufficient!), because there is an implied barrier. 43367ec681f3Smrg */ 43377ec681f3Smrgvoid ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 43387ec681f3Smrg{ 43397ec681f3Smrg ac_build_wg_scan_top(ctx, ws); 43407ec681f3Smrg ac_build_s_barrier(ctx); 43417ec681f3Smrg ac_build_wg_scan_bottom(ctx, ws); 43427ec681f3Smrg} 43437ec681f3Smrg 43447ec681f3SmrgLLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, 43457ec681f3Smrg unsigned lane1, unsigned lane2, unsigned lane3) 43467ec681f3Smrg{ 43477ec681f3Smrg unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3); 43487ec681f3Smrg if (ctx->chip_class >= GFX8) { 43497ec681f3Smrg return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false); 43507ec681f3Smrg } else { 43517ec681f3Smrg return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask); 43527ec681f3Smrg } 43537ec681f3Smrg} 43547ec681f3Smrg 43557ec681f3SmrgLLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index) 43567ec681f3Smrg{ 43577ec681f3Smrg LLVMTypeRef type = LLVMTypeOf(src); 43587ec681f3Smrg LLVMValueRef result; 43597ec681f3Smrg 43607ec681f3Smrg index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); 43617ec681f3Smrg src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); 43627ec681f3Smrg 43637ec681f3Smrg result = 43647ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, (LLVMValueRef[]){index, src}, 2, 43657ec681f3Smrg AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 43667ec681f3Smrg return LLVMBuildTrunc(ctx->builder, result, type, ""); 43677ec681f3Smrg} 43687ec681f3Smrg 43697ec681f3SmrgLLVMValueRef ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) 43707ec681f3Smrg{ 43717ec681f3Smrg LLVMTypeRef type; 43727ec681f3Smrg char *intr; 43737ec681f3Smrg 43747ec681f3Smrg if (bitsize == 16) { 43757ec681f3Smrg intr = "llvm.amdgcn.frexp.exp.i16.f16"; 43767ec681f3Smrg type = ctx->i16; 43777ec681f3Smrg } else if (bitsize == 32) { 43787ec681f3Smrg intr = "llvm.amdgcn.frexp.exp.i32.f32"; 43797ec681f3Smrg type = ctx->i32; 43807ec681f3Smrg } else { 43817ec681f3Smrg intr = "llvm.amdgcn.frexp.exp.i32.f64"; 43827ec681f3Smrg type = ctx->i32; 43837ec681f3Smrg } 43847ec681f3Smrg 43857ec681f3Smrg LLVMValueRef params[] = { 43867ec681f3Smrg src0, 43877ec681f3Smrg }; 43887ec681f3Smrg return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); 43897ec681f3Smrg} 43907ec681f3SmrgLLVMValueRef ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) 43917ec681f3Smrg{ 43927ec681f3Smrg LLVMTypeRef type; 43937ec681f3Smrg char *intr; 43947ec681f3Smrg 43957ec681f3Smrg if (bitsize == 16) { 43967ec681f3Smrg intr = "llvm.amdgcn.frexp.mant.f16"; 43977ec681f3Smrg type = ctx->f16; 43987ec681f3Smrg } else if (bitsize == 32) { 43997ec681f3Smrg intr = "llvm.amdgcn.frexp.mant.f32"; 44007ec681f3Smrg type = ctx->f32; 44017ec681f3Smrg } else { 44027ec681f3Smrg intr = "llvm.amdgcn.frexp.mant.f64"; 44037ec681f3Smrg type = ctx->f64; 44047ec681f3Smrg } 44057ec681f3Smrg 44067ec681f3Smrg LLVMValueRef params[] = { 44077ec681f3Smrg src0, 44087ec681f3Smrg }; 44097ec681f3Smrg return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); 44107ec681f3Smrg} 44117ec681f3Smrg 44127ec681f3SmrgLLVMValueRef ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) 44137ec681f3Smrg{ 44147ec681f3Smrg LLVMTypeRef type; 44157ec681f3Smrg char *intr; 44167ec681f3Smrg 44177ec681f3Smrg if (bitsize == 16) { 44187ec681f3Smrg intr = "llvm.canonicalize.f16"; 44197ec681f3Smrg type = ctx->f16; 44207ec681f3Smrg } else if (bitsize == 32) { 44217ec681f3Smrg intr = "llvm.canonicalize.f32"; 44227ec681f3Smrg type = ctx->f32; 44237ec681f3Smrg } else { 44247ec681f3Smrg intr = "llvm.canonicalize.f64"; 44257ec681f3Smrg type = ctx->f64; 44267ec681f3Smrg } 44277ec681f3Smrg 44287ec681f3Smrg LLVMValueRef params[] = { 44297ec681f3Smrg src0, 44307ec681f3Smrg }; 44317ec681f3Smrg return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); 44327ec681f3Smrg} 44337ec681f3Smrg 44347ec681f3Smrg/* 44357ec681f3Smrg * this takes an I,J coordinate pair, 44367ec681f3Smrg * and works out the X and Y derivatives. 44377ec681f3Smrg * it returns DDX(I), DDX(J), DDY(I), DDY(J). 44387ec681f3Smrg */ 44397ec681f3SmrgLLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij) 44407ec681f3Smrg{ 44417ec681f3Smrg LLVMValueRef result[4], a; 44427ec681f3Smrg unsigned i; 44437ec681f3Smrg 44447ec681f3Smrg for (i = 0; i < 2; i++) { 44457ec681f3Smrg a = LLVMBuildExtractElement(ctx->builder, interp_ij, LLVMConstInt(ctx->i32, i, false), ""); 44467ec681f3Smrg result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a); 44477ec681f3Smrg result[2 + i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a); 44487ec681f3Smrg } 44497ec681f3Smrg return ac_build_gather_values(ctx, result, 4); 44507ec681f3Smrg} 44517ec681f3Smrg 44527ec681f3SmrgLLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx) 44537ec681f3Smrg{ 44547ec681f3Smrg LLVMValueRef result; 44557ec681f3Smrg 44567ec681f3Smrg if (LLVM_VERSION_MAJOR >= 13) { 44577ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.amdgcn.live.mask", ctx->i1, NULL, 0, 44587ec681f3Smrg AC_FUNC_ATTR_READONLY | AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); 44597ec681f3Smrg } else { 44607ec681f3Smrg result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, 44617ec681f3Smrg AC_FUNC_ATTR_READNONE); 44627ec681f3Smrg } 44637ec681f3Smrg return LLVMBuildNot(ctx->builder, result, ""); 44647ec681f3Smrg} 44657ec681f3Smrg 44667ec681f3SmrgLLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx) 44677ec681f3Smrg{ 44687ec681f3Smrg if (!ctx->postponed_kill) 44697ec681f3Smrg return ac_build_load_helper_invocation(ctx); 44707ec681f3Smrg 44717ec681f3Smrg /* postponed_kill should be NULL on LLVM 13+ */ 44727ec681f3Smrg assert(LLVM_VERSION_MAJOR < 13); 44737ec681f3Smrg 44747ec681f3Smrg /* !(exact && postponed) */ 44757ec681f3Smrg LLVMValueRef exact = 44767ec681f3Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, AC_FUNC_ATTR_READNONE); 44777ec681f3Smrg 44787ec681f3Smrg LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, ""); 44797ec681f3Smrg return LLVMBuildNot(ctx->builder, LLVMBuildAnd(ctx->builder, exact, postponed, ""), ""); 44807ec681f3Smrg} 44817ec681f3Smrg 44827ec681f3SmrgLLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args, 44837ec681f3Smrg unsigned num_args) 44847ec681f3Smrg{ 44857ec681f3Smrg LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, ""); 44867ec681f3Smrg LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func)); 44877ec681f3Smrg return ret; 44887ec681f3Smrg} 44897ec681f3Smrg 44907ec681f3Smrgvoid ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, 44917ec681f3Smrg LLVMValueRef samplemask, struct ac_export_args *args) 44927ec681f3Smrg{ 44937ec681f3Smrg unsigned mask = 0; 44947ec681f3Smrg unsigned format = ac_get_spi_shader_z_format(depth != NULL, stencil != NULL, samplemask != NULL); 44957ec681f3Smrg 44967ec681f3Smrg assert(depth || stencil || samplemask); 44977ec681f3Smrg 44987ec681f3Smrg memset(args, 0, sizeof(*args)); 44997ec681f3Smrg 45007ec681f3Smrg args->valid_mask = 1; /* whether the EXEC mask is valid */ 45017ec681f3Smrg args->done = 1; /* DONE bit */ 45027ec681f3Smrg 45037ec681f3Smrg /* Specify the target we are exporting */ 45047ec681f3Smrg args->target = V_008DFC_SQ_EXP_MRTZ; 45057ec681f3Smrg 45067ec681f3Smrg args->compr = 0; /* COMP flag */ 45077ec681f3Smrg args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */ 45087ec681f3Smrg args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ 45097ec681f3Smrg args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */ 45107ec681f3Smrg args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ 45117ec681f3Smrg 45127ec681f3Smrg if (format == V_028710_SPI_SHADER_UINT16_ABGR) { 45137ec681f3Smrg assert(!depth); 45147ec681f3Smrg args->compr = 1; /* COMPR flag */ 45157ec681f3Smrg 45167ec681f3Smrg if (stencil) { 45177ec681f3Smrg /* Stencil should be in X[23:16]. */ 45187ec681f3Smrg stencil = ac_to_integer(ctx, stencil); 45197ec681f3Smrg stencil = LLVMBuildShl(ctx->builder, stencil, LLVMConstInt(ctx->i32, 16, 0), ""); 45207ec681f3Smrg args->out[0] = ac_to_float(ctx, stencil); 45217ec681f3Smrg mask |= 0x3; 45227ec681f3Smrg } 45237ec681f3Smrg if (samplemask) { 45247ec681f3Smrg /* SampleMask should be in Y[15:0]. */ 45257ec681f3Smrg args->out[1] = samplemask; 45267ec681f3Smrg mask |= 0xc; 45277ec681f3Smrg } 45287ec681f3Smrg } else { 45297ec681f3Smrg if (depth) { 45307ec681f3Smrg args->out[0] = depth; 45317ec681f3Smrg mask |= 0x1; 45327ec681f3Smrg } 45337ec681f3Smrg if (stencil) { 45347ec681f3Smrg args->out[1] = stencil; 45357ec681f3Smrg mask |= 0x2; 45367ec681f3Smrg } 45377ec681f3Smrg if (samplemask) { 45387ec681f3Smrg args->out[2] = samplemask; 45397ec681f3Smrg mask |= 0x4; 45407ec681f3Smrg } 45417ec681f3Smrg } 45427ec681f3Smrg 45437ec681f3Smrg /* GFX6 (except OLAND and HAINAN) has a bug that it only looks 45447ec681f3Smrg * at the X writemask component. */ 45457ec681f3Smrg if (ctx->chip_class == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN) 45467ec681f3Smrg mask |= 0x1; 45477ec681f3Smrg 45487ec681f3Smrg /* Specify which components to enable */ 45497ec681f3Smrg args->enabled_channels = mask; 45507ec681f3Smrg} 45517ec681f3Smrg 45527ec681f3Smrg/* Send GS Alloc Req message from the first wave of the group to SPI. 45537ec681f3Smrg * Message payload is: 45547ec681f3Smrg * - bits 0..10: vertices in group 45557ec681f3Smrg * - bits 12..22: primitives in group 45567ec681f3Smrg */ 45577ec681f3Smrgvoid ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id, 45587ec681f3Smrg LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt) 45597ec681f3Smrg{ 45607ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 45617ec681f3Smrg LLVMValueRef tmp; 45627ec681f3Smrg bool export_dummy_prim = false; 45637ec681f3Smrg 45647ec681f3Smrg /* HW workaround for a GPU hang with 100% culling. 45657ec681f3Smrg * We always have to export at least 1 primitive. 45667ec681f3Smrg * Export a degenerate triangle using vertex 0 for all 3 vertices. 45677ec681f3Smrg */ 45687ec681f3Smrg if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) { 45697ec681f3Smrg assert(vtx_cnt == ctx->i32_0); 45707ec681f3Smrg prim_cnt = ctx->i32_1; 45717ec681f3Smrg vtx_cnt = ctx->i32_1; 45727ec681f3Smrg export_dummy_prim = true; 45737ec681f3Smrg } 45747ec681f3Smrg 45757ec681f3Smrg ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); 45767ec681f3Smrg 45777ec681f3Smrg tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false), ""); 45787ec681f3Smrg tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); 45797ec681f3Smrg ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp); 45807ec681f3Smrg 45817ec681f3Smrg if (export_dummy_prim) { 45827ec681f3Smrg struct ac_ngg_prim prim = {0}; 45837ec681f3Smrg /* The vertex indices are 0,0,0. */ 45847ec681f3Smrg prim.passthrough = ctx->i32_0; 45857ec681f3Smrg 45867ec681f3Smrg struct ac_export_args pos = {0}; 45877ec681f3Smrg /* The hw culls primitives with NaN. */ 45887ec681f3Smrg pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = LLVMConstReal(ctx->f32, NAN); 45897ec681f3Smrg pos.target = V_008DFC_SQ_EXP_POS; 45907ec681f3Smrg pos.enabled_channels = 0xf; 45917ec681f3Smrg pos.done = true; 45927ec681f3Smrg 45937ec681f3Smrg ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), ctx->i32_0, ""), 45947ec681f3Smrg 5021); 45957ec681f3Smrg ac_build_export_prim(ctx, &prim); 45967ec681f3Smrg ac_build_export(ctx, &pos); 45977ec681f3Smrg ac_build_endif(ctx, 5021); 45987ec681f3Smrg } 45997ec681f3Smrg 46007ec681f3Smrg ac_build_endif(ctx, 5020); 46017ec681f3Smrg} 46027ec681f3Smrg 46037ec681f3Smrg 46047ec681f3SmrgLLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx, 46057ec681f3Smrg const struct ac_shader_args *args) 46067ec681f3Smrg{ 46077ec681f3Smrg /* Use the following trick to extract the edge flags: 46087ec681f3Smrg * extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10 46097ec681f3Smrg * shifted = v_mul_u32_u24 extracted, 0x80402u ; shift the bits: 8->9, 9->19, 10->29 46107ec681f3Smrg * result = v_and_b32 shifted, 0x20080200 ; remove garbage 46117ec681f3Smrg */ 46127ec681f3Smrg LLVMValueRef tmp = LLVMBuildAnd(ctx->builder, 46137ec681f3Smrg ac_get_arg(ctx, args->gs_invocation_id), 46147ec681f3Smrg LLVMConstInt(ctx->i32, 0x700, 0), ""); 46157ec681f3Smrg tmp = LLVMBuildMul(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x80402u, 0), ""); 46167ec681f3Smrg return LLVMBuildAnd(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x20080200, 0), ""); 46177ec681f3Smrg} 46187ec681f3Smrg 46197ec681f3SmrgLLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim) 46207ec681f3Smrg{ 46217ec681f3Smrg /* The prim export format is: 46227ec681f3Smrg * - bits 0..8: index 0 46237ec681f3Smrg * - bit 9: edge flag 0 46247ec681f3Smrg * - bits 10..18: index 1 46257ec681f3Smrg * - bit 19: edge flag 1 46267ec681f3Smrg * - bits 20..28: index 2 46277ec681f3Smrg * - bit 29: edge flag 2 46287ec681f3Smrg * - bit 31: null primitive (skip) 46297ec681f3Smrg */ 46307ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 46317ec681f3Smrg LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, ""); 46327ec681f3Smrg LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), ""); 46337ec681f3Smrg result = LLVMBuildOr(ctx->builder, result, prim->edgeflags, ""); 46347ec681f3Smrg 46357ec681f3Smrg for (unsigned i = 0; i < prim->num_vertices; ++i) { 46367ec681f3Smrg tmp = LLVMBuildShl(builder, prim->index[i], LLVMConstInt(ctx->i32, 10 * i, false), ""); 46377ec681f3Smrg result = LLVMBuildOr(builder, result, tmp, ""); 46387ec681f3Smrg } 46397ec681f3Smrg return result; 46407ec681f3Smrg} 46417ec681f3Smrg 46427ec681f3Smrgvoid ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim) 46437ec681f3Smrg{ 46447ec681f3Smrg struct ac_export_args args; 46457ec681f3Smrg 46467ec681f3Smrg if (prim->passthrough) { 46477ec681f3Smrg args.out[0] = prim->passthrough; 46487ec681f3Smrg } else { 46497ec681f3Smrg args.out[0] = ac_pack_prim_export(ctx, prim); 46507ec681f3Smrg } 46517ec681f3Smrg 46527ec681f3Smrg args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, ""); 46537ec681f3Smrg args.out[1] = LLVMGetUndef(ctx->f32); 46547ec681f3Smrg args.out[2] = LLVMGetUndef(ctx->f32); 46557ec681f3Smrg args.out[3] = LLVMGetUndef(ctx->f32); 46567ec681f3Smrg 46577ec681f3Smrg args.target = V_008DFC_SQ_EXP_PRIM; 46587ec681f3Smrg args.enabled_channels = 1; 46597ec681f3Smrg args.done = true; 46607ec681f3Smrg args.valid_mask = false; 46617ec681f3Smrg args.compr = false; 46627ec681f3Smrg 46637ec681f3Smrg ac_build_export(ctx, &args); 46647ec681f3Smrg} 46657ec681f3Smrg 46667ec681f3Smrgstatic LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx) 46677ec681f3Smrg{ 46687ec681f3Smrg if (type == AC_ARG_FLOAT) { 46697ec681f3Smrg return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size); 46707ec681f3Smrg } else if (type == AC_ARG_INT) { 46717ec681f3Smrg return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size); 46727ec681f3Smrg } else { 46737ec681f3Smrg LLVMTypeRef ptr_type; 46747ec681f3Smrg switch (type) { 46757ec681f3Smrg case AC_ARG_CONST_PTR: 46767ec681f3Smrg ptr_type = ctx->i8; 46777ec681f3Smrg break; 46787ec681f3Smrg case AC_ARG_CONST_FLOAT_PTR: 46797ec681f3Smrg ptr_type = ctx->f32; 46807ec681f3Smrg break; 46817ec681f3Smrg case AC_ARG_CONST_PTR_PTR: 46827ec681f3Smrg ptr_type = ac_array_in_const32_addr_space(ctx->i8); 46837ec681f3Smrg break; 46847ec681f3Smrg case AC_ARG_CONST_DESC_PTR: 46857ec681f3Smrg ptr_type = ctx->v4i32; 46867ec681f3Smrg break; 46877ec681f3Smrg case AC_ARG_CONST_IMAGE_PTR: 46887ec681f3Smrg ptr_type = ctx->v8i32; 46897ec681f3Smrg break; 46907ec681f3Smrg default: 46917ec681f3Smrg unreachable("unknown arg type"); 46927ec681f3Smrg } 46937ec681f3Smrg if (size == 1) { 46947ec681f3Smrg return ac_array_in_const32_addr_space(ptr_type); 46957ec681f3Smrg } else { 46967ec681f3Smrg assert(size == 2); 46977ec681f3Smrg return ac_array_in_const_addr_space(ptr_type); 46987ec681f3Smrg } 46997ec681f3Smrg } 47007ec681f3Smrg} 47017ec681f3Smrg 47027ec681f3SmrgLLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx, 47037ec681f3Smrg enum ac_llvm_calling_convention convention, const char *name, 47047ec681f3Smrg LLVMTypeRef ret_type, LLVMModuleRef module) 47057ec681f3Smrg{ 47067ec681f3Smrg LLVMTypeRef arg_types[AC_MAX_ARGS]; 47077ec681f3Smrg 47087ec681f3Smrg for (unsigned i = 0; i < args->arg_count; i++) { 47097ec681f3Smrg arg_types[i] = arg_llvm_type(args->args[i].type, args->args[i].size, ctx); 47107ec681f3Smrg } 47117ec681f3Smrg 47127ec681f3Smrg LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, args->arg_count, 0); 47137ec681f3Smrg 47147ec681f3Smrg LLVMValueRef main_function = LLVMAddFunction(module, name, main_function_type); 47157ec681f3Smrg LLVMBasicBlockRef main_function_body = 47167ec681f3Smrg LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body"); 47177ec681f3Smrg LLVMPositionBuilderAtEnd(ctx->builder, main_function_body); 47187ec681f3Smrg 47197ec681f3Smrg LLVMSetFunctionCallConv(main_function, convention); 47207ec681f3Smrg for (unsigned i = 0; i < args->arg_count; ++i) { 47217ec681f3Smrg LLVMValueRef P = LLVMGetParam(main_function, i); 47227ec681f3Smrg 47237ec681f3Smrg if (args->args[i].file != AC_ARG_SGPR) 47247ec681f3Smrg continue; 47257ec681f3Smrg 47267ec681f3Smrg ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG); 47277ec681f3Smrg 47287ec681f3Smrg if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { 47297ec681f3Smrg ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); 47307ec681f3Smrg ac_add_attr_dereferenceable(P, UINT64_MAX); 47317ec681f3Smrg ac_add_attr_alignment(P, 4); 47327ec681f3Smrg } 47337ec681f3Smrg } 47347ec681f3Smrg 47357ec681f3Smrg ctx->main_function = main_function; 47367ec681f3Smrg 47377ec681f3Smrg /* Enable denormals for FP16 and FP64: */ 47387ec681f3Smrg LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", "ieee,ieee"); 47397ec681f3Smrg /* Disable denormals for FP32: */ 47407ec681f3Smrg LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32", 47417ec681f3Smrg "preserve-sign,preserve-sign"); 47427ec681f3Smrg return main_function; 47437ec681f3Smrg} 47447ec681f3Smrg 47457ec681f3Smrgvoid ac_build_s_endpgm(struct ac_llvm_context *ctx) 47467ec681f3Smrg{ 47477ec681f3Smrg LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false); 47487ec681f3Smrg LLVMValueRef code = LLVMConstInlineAsm(calltype, "s_endpgm", "", true, false); 47497ec681f3Smrg LLVMBuildCall(ctx->builder, code, NULL, 0, ""); 47507ec681f3Smrg} 47517ec681f3Smrg 47527ec681f3Smrg/** 47537ec681f3Smrg * Convert triangle strip indices to triangle indices. This is used to decompose 47547ec681f3Smrg * triangle strips into triangles. 47557ec681f3Smrg */ 47567ec681f3Smrgvoid ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd, 47577ec681f3Smrg LLVMValueRef flatshade_first, 47587ec681f3Smrg LLVMValueRef index[3]) 47597ec681f3Smrg{ 47607ec681f3Smrg LLVMBuilderRef builder = ctx->builder; 47617ec681f3Smrg LLVMValueRef out[3]; 47627ec681f3Smrg 47637ec681f3Smrg /* We need to change the vertex order for odd triangles to get correct 47647ec681f3Smrg * front/back facing by swapping 2 vertex indices, but we also have to 47657ec681f3Smrg * keep the provoking vertex in the same place. 47667ec681f3Smrg * 47677ec681f3Smrg * If the first vertex is provoking, swap index 1 and 2. 47687ec681f3Smrg * If the last vertex is provoking, swap index 0 and 1. 47697ec681f3Smrg */ 47707ec681f3Smrg out[0] = LLVMBuildSelect(builder, flatshade_first, index[0], 47717ec681f3Smrg LLVMBuildSelect(builder, is_odd, index[1], index[0], ""), ""); 47727ec681f3Smrg out[1] = LLVMBuildSelect(builder, flatshade_first, 47737ec681f3Smrg LLVMBuildSelect(builder, is_odd, index[2], index[1], ""), 47747ec681f3Smrg LLVMBuildSelect(builder, is_odd, index[0], index[1], ""), ""); 47757ec681f3Smrg out[2] = LLVMBuildSelect(builder, flatshade_first, 47767ec681f3Smrg LLVMBuildSelect(builder, is_odd, index[1], index[2], ""), index[2], ""); 47777ec681f3Smrg memcpy(index, out, sizeof(out)); 47787ec681f3Smrg} 4779