1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2014 Advanced Micro Devices, Inc. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the 6b8e80941Smrg * "Software"), to deal in the Software without restriction, including 7b8e80941Smrg * without limitation the rights to use, copy, modify, merge, publish, 8b8e80941Smrg * distribute, sub license, and/or sell copies of the Software, and to 9b8e80941Smrg * permit persons to whom the Software is furnished to do so, subject to 10b8e80941Smrg * the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15b8e80941Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 19b8e80941Smrg * 20b8e80941Smrg * The above copyright notice and this permission notice (including the 21b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions 22b8e80941Smrg * of the Software. 23b8e80941Smrg * 24b8e80941Smrg */ 25b8e80941Smrg/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ 26b8e80941Smrg#include "ac_llvm_build.h" 27b8e80941Smrg 28b8e80941Smrg#include <llvm-c/Core.h> 29b8e80941Smrg 30b8e80941Smrg#include "c11/threads.h" 31b8e80941Smrg 32b8e80941Smrg#include <assert.h> 33b8e80941Smrg#include <stdio.h> 34b8e80941Smrg 35b8e80941Smrg#include "ac_llvm_util.h" 36b8e80941Smrg#include "ac_exp_param.h" 37b8e80941Smrg#include "util/bitscan.h" 38b8e80941Smrg#include "util/macros.h" 39b8e80941Smrg#include "util/u_atomic.h" 40b8e80941Smrg#include "util/u_math.h" 41b8e80941Smrg#include "sid.h" 42b8e80941Smrg 43b8e80941Smrg#include "shader_enums.h" 44b8e80941Smrg 45b8e80941Smrg#define AC_LLVM_INITIAL_CF_DEPTH 4 46b8e80941Smrg 47b8e80941Smrg/* Data for if/else/endif and bgnloop/endloop control flow structures. 48b8e80941Smrg */ 49b8e80941Smrgstruct ac_llvm_flow { 50b8e80941Smrg /* Loop exit or next part of if/else/endif. */ 51b8e80941Smrg LLVMBasicBlockRef next_block; 52b8e80941Smrg LLVMBasicBlockRef loop_entry_block; 53b8e80941Smrg}; 54b8e80941Smrg 55b8e80941Smrg/* Initialize module-independent parts of the context. 56b8e80941Smrg * 57b8e80941Smrg * The caller is responsible for initializing ctx::module and ctx::builder. 58b8e80941Smrg */ 59b8e80941Smrgvoid 60b8e80941Smrgac_llvm_context_init(struct ac_llvm_context *ctx, 61b8e80941Smrg enum chip_class chip_class, enum radeon_family family) 62b8e80941Smrg{ 63b8e80941Smrg LLVMValueRef args[1]; 64b8e80941Smrg 65b8e80941Smrg ctx->context = LLVMContextCreate(); 66b8e80941Smrg 67b8e80941Smrg ctx->chip_class = chip_class; 68b8e80941Smrg ctx->family = family; 69b8e80941Smrg ctx->module = NULL; 70b8e80941Smrg ctx->builder = NULL; 71b8e80941Smrg 72b8e80941Smrg ctx->voidt = LLVMVoidTypeInContext(ctx->context); 73b8e80941Smrg ctx->i1 = LLVMInt1TypeInContext(ctx->context); 74b8e80941Smrg ctx->i8 = LLVMInt8TypeInContext(ctx->context); 75b8e80941Smrg ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); 76b8e80941Smrg ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); 77b8e80941Smrg ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); 78b8e80941Smrg ctx->intptr = ctx->i32; 79b8e80941Smrg ctx->f16 = LLVMHalfTypeInContext(ctx->context); 80b8e80941Smrg ctx->f32 = LLVMFloatTypeInContext(ctx->context); 81b8e80941Smrg ctx->f64 = LLVMDoubleTypeInContext(ctx->context); 82b8e80941Smrg ctx->v2i16 = LLVMVectorType(ctx->i16, 2); 83b8e80941Smrg ctx->v2i32 = LLVMVectorType(ctx->i32, 2); 84b8e80941Smrg ctx->v3i32 = LLVMVectorType(ctx->i32, 3); 85b8e80941Smrg ctx->v4i32 = LLVMVectorType(ctx->i32, 4); 86b8e80941Smrg ctx->v2f32 = LLVMVectorType(ctx->f32, 2); 87b8e80941Smrg ctx->v4f32 = LLVMVectorType(ctx->f32, 4); 88b8e80941Smrg ctx->v8i32 = LLVMVectorType(ctx->i32, 8); 89b8e80941Smrg 90b8e80941Smrg ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); 91b8e80941Smrg ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); 92b8e80941Smrg ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false); 93b8e80941Smrg ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false); 94b8e80941Smrg ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); 95b8e80941Smrg ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); 96b8e80941Smrg ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false); 97b8e80941Smrg ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false); 98b8e80941Smrg ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0); 99b8e80941Smrg ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0); 100b8e80941Smrg ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); 101b8e80941Smrg ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0); 102b8e80941Smrg ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0); 103b8e80941Smrg ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0); 104b8e80941Smrg 105b8e80941Smrg ctx->i1false = LLVMConstInt(ctx->i1, 0, false); 106b8e80941Smrg ctx->i1true = LLVMConstInt(ctx->i1, 1, false); 107b8e80941Smrg 108b8e80941Smrg ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, 109b8e80941Smrg "range", 5); 110b8e80941Smrg 111b8e80941Smrg ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, 112b8e80941Smrg "invariant.load", 14); 113b8e80941Smrg 114b8e80941Smrg ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6); 115b8e80941Smrg 116b8e80941Smrg args[0] = LLVMConstReal(ctx->f32, 2.5); 117b8e80941Smrg ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); 118b8e80941Smrg 119b8e80941Smrg ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, 120b8e80941Smrg "amdgpu.uniform", 14); 121b8e80941Smrg 122b8e80941Smrg ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); 123b8e80941Smrg} 124b8e80941Smrg 125b8e80941Smrgvoid 126b8e80941Smrgac_llvm_context_dispose(struct ac_llvm_context *ctx) 127b8e80941Smrg{ 128b8e80941Smrg free(ctx->flow); 129b8e80941Smrg ctx->flow = NULL; 130b8e80941Smrg ctx->flow_depth_max = 0; 131b8e80941Smrg} 132b8e80941Smrg 133b8e80941Smrgint 134b8e80941Smrgac_get_llvm_num_components(LLVMValueRef value) 135b8e80941Smrg{ 136b8e80941Smrg LLVMTypeRef type = LLVMTypeOf(value); 137b8e80941Smrg unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind 138b8e80941Smrg ? LLVMGetVectorSize(type) 139b8e80941Smrg : 1; 140b8e80941Smrg return num_components; 141b8e80941Smrg} 142b8e80941Smrg 143b8e80941SmrgLLVMValueRef 144b8e80941Smrgac_llvm_extract_elem(struct ac_llvm_context *ac, 145b8e80941Smrg LLVMValueRef value, 146b8e80941Smrg int index) 147b8e80941Smrg{ 148b8e80941Smrg if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) { 149b8e80941Smrg assert(index == 0); 150b8e80941Smrg return value; 151b8e80941Smrg } 152b8e80941Smrg 153b8e80941Smrg return LLVMBuildExtractElement(ac->builder, value, 154b8e80941Smrg LLVMConstInt(ac->i32, index, false), ""); 155b8e80941Smrg} 156b8e80941Smrg 157b8e80941Smrgint 158b8e80941Smrgac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) 159b8e80941Smrg{ 160b8e80941Smrg if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) 161b8e80941Smrg type = LLVMGetElementType(type); 162b8e80941Smrg 163b8e80941Smrg if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) 164b8e80941Smrg return LLVMGetIntTypeWidth(type); 165b8e80941Smrg 166b8e80941Smrg if (type == ctx->f16) 167b8e80941Smrg return 16; 168b8e80941Smrg if (type == ctx->f32) 169b8e80941Smrg return 32; 170b8e80941Smrg if (type == ctx->f64) 171b8e80941Smrg return 64; 172b8e80941Smrg 173b8e80941Smrg unreachable("Unhandled type kind in get_elem_bits"); 174b8e80941Smrg} 175b8e80941Smrg 176b8e80941Smrgunsigned 177b8e80941Smrgac_get_type_size(LLVMTypeRef type) 178b8e80941Smrg{ 179b8e80941Smrg LLVMTypeKind kind = LLVMGetTypeKind(type); 180b8e80941Smrg 181b8e80941Smrg switch (kind) { 182b8e80941Smrg case LLVMIntegerTypeKind: 183b8e80941Smrg return LLVMGetIntTypeWidth(type) / 8; 184b8e80941Smrg case LLVMHalfTypeKind: 185b8e80941Smrg return 2; 186b8e80941Smrg case LLVMFloatTypeKind: 187b8e80941Smrg return 4; 188b8e80941Smrg case LLVMDoubleTypeKind: 189b8e80941Smrg return 8; 190b8e80941Smrg case LLVMPointerTypeKind: 191b8e80941Smrg if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT) 192b8e80941Smrg return 4; 193b8e80941Smrg return 8; 194b8e80941Smrg case LLVMVectorTypeKind: 195b8e80941Smrg return LLVMGetVectorSize(type) * 196b8e80941Smrg ac_get_type_size(LLVMGetElementType(type)); 197b8e80941Smrg case LLVMArrayTypeKind: 198b8e80941Smrg return LLVMGetArrayLength(type) * 199b8e80941Smrg ac_get_type_size(LLVMGetElementType(type)); 200b8e80941Smrg default: 201b8e80941Smrg assert(0); 202b8e80941Smrg return 0; 203b8e80941Smrg } 204b8e80941Smrg} 205b8e80941Smrg 206b8e80941Smrgstatic LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) 207b8e80941Smrg{ 208b8e80941Smrg if (t == ctx->i8) 209b8e80941Smrg return ctx->i8; 210b8e80941Smrg else if (t == ctx->f16 || t == ctx->i16) 211b8e80941Smrg return ctx->i16; 212b8e80941Smrg else if (t == ctx->f32 || t == ctx->i32) 213b8e80941Smrg return ctx->i32; 214b8e80941Smrg else if (t == ctx->f64 || t == ctx->i64) 215b8e80941Smrg return ctx->i64; 216b8e80941Smrg else 217b8e80941Smrg unreachable("Unhandled integer size"); 218b8e80941Smrg} 219b8e80941Smrg 220b8e80941SmrgLLVMTypeRef 221b8e80941Smrgac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t) 222b8e80941Smrg{ 223b8e80941Smrg if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { 224b8e80941Smrg LLVMTypeRef elem_type = LLVMGetElementType(t); 225b8e80941Smrg return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), 226b8e80941Smrg LLVMGetVectorSize(t)); 227b8e80941Smrg } 228b8e80941Smrg if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) { 229b8e80941Smrg switch (LLVMGetPointerAddressSpace(t)) { 230b8e80941Smrg case AC_ADDR_SPACE_GLOBAL: 231b8e80941Smrg return ctx->i64; 232b8e80941Smrg case AC_ADDR_SPACE_LDS: 233b8e80941Smrg return ctx->i32; 234b8e80941Smrg default: 235b8e80941Smrg unreachable("unhandled address space"); 236b8e80941Smrg } 237b8e80941Smrg } 238b8e80941Smrg return to_integer_type_scalar(ctx, t); 239b8e80941Smrg} 240b8e80941Smrg 241b8e80941SmrgLLVMValueRef 242b8e80941Smrgac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v) 243b8e80941Smrg{ 244b8e80941Smrg LLVMTypeRef type = LLVMTypeOf(v); 245b8e80941Smrg if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { 246b8e80941Smrg return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), ""); 247b8e80941Smrg } 248b8e80941Smrg return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), ""); 249b8e80941Smrg} 250b8e80941Smrg 251b8e80941SmrgLLVMValueRef 252b8e80941Smrgac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v) 253b8e80941Smrg{ 254b8e80941Smrg LLVMTypeRef type = LLVMTypeOf(v); 255b8e80941Smrg if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) 256b8e80941Smrg return v; 257b8e80941Smrg return ac_to_integer(ctx, v); 258b8e80941Smrg} 259b8e80941Smrg 260b8e80941Smrgstatic LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) 261b8e80941Smrg{ 262b8e80941Smrg if (t == ctx->i8) 263b8e80941Smrg return ctx->i8; 264b8e80941Smrg else if (t == ctx->i16 || t == ctx->f16) 265b8e80941Smrg return ctx->f16; 266b8e80941Smrg else if (t == ctx->i32 || t == ctx->f32) 267b8e80941Smrg return ctx->f32; 268b8e80941Smrg else if (t == ctx->i64 || t == ctx->f64) 269b8e80941Smrg return ctx->f64; 270b8e80941Smrg else 271b8e80941Smrg unreachable("Unhandled float size"); 272b8e80941Smrg} 273b8e80941Smrg 274b8e80941SmrgLLVMTypeRef 275b8e80941Smrgac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t) 276b8e80941Smrg{ 277b8e80941Smrg if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { 278b8e80941Smrg LLVMTypeRef elem_type = LLVMGetElementType(t); 279b8e80941Smrg return LLVMVectorType(to_float_type_scalar(ctx, elem_type), 280b8e80941Smrg LLVMGetVectorSize(t)); 281b8e80941Smrg } 282b8e80941Smrg return to_float_type_scalar(ctx, t); 283b8e80941Smrg} 284b8e80941Smrg 285b8e80941SmrgLLVMValueRef 286b8e80941Smrgac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v) 287b8e80941Smrg{ 288b8e80941Smrg LLVMTypeRef type = LLVMTypeOf(v); 289b8e80941Smrg return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), ""); 290b8e80941Smrg} 291b8e80941Smrg 292b8e80941Smrg 293b8e80941SmrgLLVMValueRef 294b8e80941Smrgac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, 295b8e80941Smrg LLVMTypeRef return_type, LLVMValueRef *params, 296b8e80941Smrg unsigned param_count, unsigned attrib_mask) 297b8e80941Smrg{ 298b8e80941Smrg LLVMValueRef function, call; 299b8e80941Smrg bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY); 300b8e80941Smrg 301b8e80941Smrg function = LLVMGetNamedFunction(ctx->module, name); 302b8e80941Smrg if (!function) { 303b8e80941Smrg LLVMTypeRef param_types[32], function_type; 304b8e80941Smrg unsigned i; 305b8e80941Smrg 306b8e80941Smrg assert(param_count <= 32); 307b8e80941Smrg 308b8e80941Smrg for (i = 0; i < param_count; ++i) { 309b8e80941Smrg assert(params[i]); 310b8e80941Smrg param_types[i] = LLVMTypeOf(params[i]); 311b8e80941Smrg } 312b8e80941Smrg function_type = 313b8e80941Smrg LLVMFunctionType(return_type, param_types, param_count, 0); 314b8e80941Smrg function = LLVMAddFunction(ctx->module, name, function_type); 315b8e80941Smrg 316b8e80941Smrg LLVMSetFunctionCallConv(function, LLVMCCallConv); 317b8e80941Smrg LLVMSetLinkage(function, LLVMExternalLinkage); 318b8e80941Smrg 319b8e80941Smrg if (!set_callsite_attrs) 320b8e80941Smrg ac_add_func_attributes(ctx->context, function, attrib_mask); 321b8e80941Smrg } 322b8e80941Smrg 323b8e80941Smrg call = LLVMBuildCall(ctx->builder, function, params, param_count, ""); 324b8e80941Smrg if (set_callsite_attrs) 325b8e80941Smrg ac_add_func_attributes(ctx->context, call, attrib_mask); 326b8e80941Smrg return call; 327b8e80941Smrg} 328b8e80941Smrg 329b8e80941Smrg/** 330b8e80941Smrg * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with 331b8e80941Smrg * intrinsic names). 332b8e80941Smrg */ 333b8e80941Smrgvoid ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) 334b8e80941Smrg{ 335b8e80941Smrg LLVMTypeRef elem_type = type; 336b8e80941Smrg 337b8e80941Smrg assert(bufsize >= 8); 338b8e80941Smrg 339b8e80941Smrg if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { 340b8e80941Smrg int ret = snprintf(buf, bufsize, "v%u", 341b8e80941Smrg LLVMGetVectorSize(type)); 342b8e80941Smrg if (ret < 0) { 343b8e80941Smrg char *type_name = LLVMPrintTypeToString(type); 344b8e80941Smrg fprintf(stderr, "Error building type name for: %s\n", 345b8e80941Smrg type_name); 346b8e80941Smrg return; 347b8e80941Smrg } 348b8e80941Smrg elem_type = LLVMGetElementType(type); 349b8e80941Smrg buf += ret; 350b8e80941Smrg bufsize -= ret; 351b8e80941Smrg } 352b8e80941Smrg switch (LLVMGetTypeKind(elem_type)) { 353b8e80941Smrg default: break; 354b8e80941Smrg case LLVMIntegerTypeKind: 355b8e80941Smrg snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); 356b8e80941Smrg break; 357b8e80941Smrg case LLVMHalfTypeKind: 358b8e80941Smrg snprintf(buf, bufsize, "f16"); 359b8e80941Smrg break; 360b8e80941Smrg case LLVMFloatTypeKind: 361b8e80941Smrg snprintf(buf, bufsize, "f32"); 362b8e80941Smrg break; 363b8e80941Smrg case LLVMDoubleTypeKind: 364b8e80941Smrg snprintf(buf, bufsize, "f64"); 365b8e80941Smrg break; 366b8e80941Smrg } 367b8e80941Smrg} 368b8e80941Smrg 369b8e80941Smrg/** 370b8e80941Smrg * Helper function that builds an LLVM IR PHI node and immediately adds 371b8e80941Smrg * incoming edges. 372b8e80941Smrg */ 373b8e80941SmrgLLVMValueRef 374b8e80941Smrgac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, 375b8e80941Smrg unsigned count_incoming, LLVMValueRef *values, 376b8e80941Smrg LLVMBasicBlockRef *blocks) 377b8e80941Smrg{ 378b8e80941Smrg LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, ""); 379b8e80941Smrg LLVMAddIncoming(phi, values, blocks, count_incoming); 380b8e80941Smrg return phi; 381b8e80941Smrg} 382b8e80941Smrg 383b8e80941Smrgvoid ac_build_s_barrier(struct ac_llvm_context *ctx) 384b8e80941Smrg{ 385b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 386b8e80941Smrg 0, AC_FUNC_ATTR_CONVERGENT); 387b8e80941Smrg} 388b8e80941Smrg 389b8e80941Smrg/* Prevent optimizations (at least of memory accesses) across the current 390b8e80941Smrg * point in the program by emitting empty inline assembly that is marked as 391b8e80941Smrg * having side effects. 392b8e80941Smrg * 393b8e80941Smrg * Optionally, a value can be passed through the inline assembly to prevent 394b8e80941Smrg * LLVM from hoisting calls to ReadNone functions. 395b8e80941Smrg */ 396b8e80941Smrgvoid 397b8e80941Smrgac_build_optimization_barrier(struct ac_llvm_context *ctx, 398b8e80941Smrg LLVMValueRef *pvgpr) 399b8e80941Smrg{ 400b8e80941Smrg static int counter = 0; 401b8e80941Smrg 402b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 403b8e80941Smrg char code[16]; 404b8e80941Smrg 405b8e80941Smrg snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter)); 406b8e80941Smrg 407b8e80941Smrg if (!pvgpr) { 408b8e80941Smrg LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); 409b8e80941Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); 410b8e80941Smrg LLVMBuildCall(builder, inlineasm, NULL, 0, ""); 411b8e80941Smrg } else { 412b8e80941Smrg LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); 413b8e80941Smrg LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false); 414b8e80941Smrg LLVMValueRef vgpr = *pvgpr; 415b8e80941Smrg LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr); 416b8e80941Smrg unsigned vgpr_size = ac_get_type_size(vgpr_type); 417b8e80941Smrg LLVMValueRef vgpr0; 418b8e80941Smrg 419b8e80941Smrg assert(vgpr_size % 4 == 0); 420b8e80941Smrg 421b8e80941Smrg vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); 422b8e80941Smrg vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); 423b8e80941Smrg vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); 424b8e80941Smrg vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); 425b8e80941Smrg vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); 426b8e80941Smrg 427b8e80941Smrg *pvgpr = vgpr; 428b8e80941Smrg } 429b8e80941Smrg} 430b8e80941Smrg 431b8e80941SmrgLLVMValueRef 432b8e80941Smrgac_build_shader_clock(struct ac_llvm_context *ctx) 433b8e80941Smrg{ 434b8e80941Smrg LLVMValueRef tmp = ac_build_intrinsic(ctx, "llvm.readcyclecounter", 435b8e80941Smrg ctx->i64, NULL, 0, 0); 436b8e80941Smrg return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, ""); 437b8e80941Smrg} 438b8e80941Smrg 439b8e80941SmrgLLVMValueRef 440b8e80941Smrgac_build_ballot(struct ac_llvm_context *ctx, 441b8e80941Smrg LLVMValueRef value) 442b8e80941Smrg{ 443b8e80941Smrg LLVMValueRef args[3] = { 444b8e80941Smrg value, 445b8e80941Smrg ctx->i32_0, 446b8e80941Smrg LLVMConstInt(ctx->i32, LLVMIntNE, 0) 447b8e80941Smrg }; 448b8e80941Smrg 449b8e80941Smrg /* We currently have no other way to prevent LLVM from lifting the icmp 450b8e80941Smrg * calls to a dominating basic block. 451b8e80941Smrg */ 452b8e80941Smrg ac_build_optimization_barrier(ctx, &args[0]); 453b8e80941Smrg 454b8e80941Smrg args[0] = ac_to_integer(ctx, args[0]); 455b8e80941Smrg 456b8e80941Smrg return ac_build_intrinsic(ctx, 457b8e80941Smrg "llvm.amdgcn.icmp.i32", 458b8e80941Smrg ctx->i64, args, 3, 459b8e80941Smrg AC_FUNC_ATTR_NOUNWIND | 460b8e80941Smrg AC_FUNC_ATTR_READNONE | 461b8e80941Smrg AC_FUNC_ATTR_CONVERGENT); 462b8e80941Smrg} 463b8e80941Smrg 464b8e80941SmrgLLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, 465b8e80941Smrg LLVMValueRef value) 466b8e80941Smrg{ 467b8e80941Smrg LLVMValueRef args[3] = { 468b8e80941Smrg value, 469b8e80941Smrg ctx->i1false, 470b8e80941Smrg LLVMConstInt(ctx->i32, LLVMIntNE, 0), 471b8e80941Smrg }; 472b8e80941Smrg 473b8e80941Smrg assert(HAVE_LLVM >= 0x0800); 474b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.icmp.i1", ctx->i64, args, 3, 475b8e80941Smrg AC_FUNC_ATTR_NOUNWIND | 476b8e80941Smrg AC_FUNC_ATTR_READNONE | 477b8e80941Smrg AC_FUNC_ATTR_CONVERGENT); 478b8e80941Smrg} 479b8e80941Smrg 480b8e80941SmrgLLVMValueRef 481b8e80941Smrgac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value) 482b8e80941Smrg{ 483b8e80941Smrg LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); 484b8e80941Smrg LLVMValueRef vote_set = ac_build_ballot(ctx, value); 485b8e80941Smrg return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, ""); 486b8e80941Smrg} 487b8e80941Smrg 488b8e80941SmrgLLVMValueRef 489b8e80941Smrgac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) 490b8e80941Smrg{ 491b8e80941Smrg LLVMValueRef vote_set = ac_build_ballot(ctx, value); 492b8e80941Smrg return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, 493b8e80941Smrg LLVMConstInt(ctx->i64, 0, 0), ""); 494b8e80941Smrg} 495b8e80941Smrg 496b8e80941SmrgLLVMValueRef 497b8e80941Smrgac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) 498b8e80941Smrg{ 499b8e80941Smrg LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); 500b8e80941Smrg LLVMValueRef vote_set = ac_build_ballot(ctx, value); 501b8e80941Smrg 502b8e80941Smrg LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, 503b8e80941Smrg vote_set, active_set, ""); 504b8e80941Smrg LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ, 505b8e80941Smrg vote_set, 506b8e80941Smrg LLVMConstInt(ctx->i64, 0, 0), ""); 507b8e80941Smrg return LLVMBuildOr(ctx->builder, all, none, ""); 508b8e80941Smrg} 509b8e80941Smrg 510b8e80941SmrgLLVMValueRef 511b8e80941Smrgac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, 512b8e80941Smrg unsigned value_count, unsigned component) 513b8e80941Smrg{ 514b8e80941Smrg LLVMValueRef vec = NULL; 515b8e80941Smrg 516b8e80941Smrg if (value_count == 1) { 517b8e80941Smrg return values[component]; 518b8e80941Smrg } else if (!value_count) 519b8e80941Smrg unreachable("value_count is 0"); 520b8e80941Smrg 521b8e80941Smrg for (unsigned i = component; i < value_count + component; i++) { 522b8e80941Smrg LLVMValueRef value = values[i]; 523b8e80941Smrg 524b8e80941Smrg if (i == component) 525b8e80941Smrg vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); 526b8e80941Smrg LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false); 527b8e80941Smrg vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); 528b8e80941Smrg } 529b8e80941Smrg return vec; 530b8e80941Smrg} 531b8e80941Smrg 532b8e80941SmrgLLVMValueRef 533b8e80941Smrgac_build_gather_values_extended(struct ac_llvm_context *ctx, 534b8e80941Smrg LLVMValueRef *values, 535b8e80941Smrg unsigned value_count, 536b8e80941Smrg unsigned value_stride, 537b8e80941Smrg bool load, 538b8e80941Smrg bool always_vector) 539b8e80941Smrg{ 540b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 541b8e80941Smrg LLVMValueRef vec = NULL; 542b8e80941Smrg unsigned i; 543b8e80941Smrg 544b8e80941Smrg if (value_count == 1 && !always_vector) { 545b8e80941Smrg if (load) 546b8e80941Smrg return LLVMBuildLoad(builder, values[0], ""); 547b8e80941Smrg return values[0]; 548b8e80941Smrg } else if (!value_count) 549b8e80941Smrg unreachable("value_count is 0"); 550b8e80941Smrg 551b8e80941Smrg for (i = 0; i < value_count; i++) { 552b8e80941Smrg LLVMValueRef value = values[i * value_stride]; 553b8e80941Smrg if (load) 554b8e80941Smrg value = LLVMBuildLoad(builder, value, ""); 555b8e80941Smrg 556b8e80941Smrg if (!i) 557b8e80941Smrg vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); 558b8e80941Smrg LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); 559b8e80941Smrg vec = LLVMBuildInsertElement(builder, vec, value, index, ""); 560b8e80941Smrg } 561b8e80941Smrg return vec; 562b8e80941Smrg} 563b8e80941Smrg 564b8e80941SmrgLLVMValueRef 565b8e80941Smrgac_build_gather_values(struct ac_llvm_context *ctx, 566b8e80941Smrg LLVMValueRef *values, 567b8e80941Smrg unsigned value_count) 568b8e80941Smrg{ 569b8e80941Smrg return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false); 570b8e80941Smrg} 571b8e80941Smrg 572b8e80941Smrg/* Expand a scalar or vector to <dst_channels x type> by filling the remaining 573b8e80941Smrg * channels with undef. Extract at most src_channels components from the input. 574b8e80941Smrg */ 575b8e80941Smrgstatic LLVMValueRef 576b8e80941Smrgac_build_expand(struct ac_llvm_context *ctx, 577b8e80941Smrg LLVMValueRef value, 578b8e80941Smrg unsigned src_channels, 579b8e80941Smrg unsigned dst_channels) 580b8e80941Smrg{ 581b8e80941Smrg LLVMTypeRef elemtype; 582b8e80941Smrg LLVMValueRef chan[dst_channels]; 583b8e80941Smrg 584b8e80941Smrg if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) { 585b8e80941Smrg unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value)); 586b8e80941Smrg 587b8e80941Smrg if (src_channels == dst_channels && vec_size == dst_channels) 588b8e80941Smrg return value; 589b8e80941Smrg 590b8e80941Smrg src_channels = MIN2(src_channels, vec_size); 591b8e80941Smrg 592b8e80941Smrg for (unsigned i = 0; i < src_channels; i++) 593b8e80941Smrg chan[i] = ac_llvm_extract_elem(ctx, value, i); 594b8e80941Smrg 595b8e80941Smrg elemtype = LLVMGetElementType(LLVMTypeOf(value)); 596b8e80941Smrg } else { 597b8e80941Smrg if (src_channels) { 598b8e80941Smrg assert(src_channels == 1); 599b8e80941Smrg chan[0] = value; 600b8e80941Smrg } 601b8e80941Smrg elemtype = LLVMTypeOf(value); 602b8e80941Smrg } 603b8e80941Smrg 604b8e80941Smrg for (unsigned i = src_channels; i < dst_channels; i++) 605b8e80941Smrg chan[i] = LLVMGetUndef(elemtype); 606b8e80941Smrg 607b8e80941Smrg return ac_build_gather_values(ctx, chan, dst_channels); 608b8e80941Smrg} 609b8e80941Smrg 610b8e80941Smrg/* Expand a scalar or vector to <4 x type> by filling the remaining channels 611b8e80941Smrg * with undef. Extract at most num_channels components from the input. 612b8e80941Smrg */ 613b8e80941SmrgLLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, 614b8e80941Smrg LLVMValueRef value, 615b8e80941Smrg unsigned num_channels) 616b8e80941Smrg{ 617b8e80941Smrg return ac_build_expand(ctx, value, num_channels, 4); 618b8e80941Smrg} 619b8e80941Smrg 620b8e80941SmrgLLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value) 621b8e80941Smrg{ 622b8e80941Smrg unsigned type_size = ac_get_type_size(LLVMTypeOf(value)); 623b8e80941Smrg const char *name; 624b8e80941Smrg 625b8e80941Smrg if (type_size == 2) 626b8e80941Smrg name = "llvm.rint.f16"; 627b8e80941Smrg else if (type_size == 4) 628b8e80941Smrg name = "llvm.rint.f32"; 629b8e80941Smrg else 630b8e80941Smrg name = "llvm.rint.f64"; 631b8e80941Smrg 632b8e80941Smrg return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1, 633b8e80941Smrg AC_FUNC_ATTR_READNONE); 634b8e80941Smrg} 635b8e80941Smrg 636b8e80941SmrgLLVMValueRef 637b8e80941Smrgac_build_fdiv(struct ac_llvm_context *ctx, 638b8e80941Smrg LLVMValueRef num, 639b8e80941Smrg LLVMValueRef den) 640b8e80941Smrg{ 641b8e80941Smrg /* If we do (num / den), LLVM >= 7.0 does: 642b8e80941Smrg * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f)); 643b8e80941Smrg * 644b8e80941Smrg * If we do (num * (1 / den)), LLVM does: 645b8e80941Smrg * return num * v_rcp_f32(den); 646b8e80941Smrg */ 647b8e80941Smrg LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0); 648b8e80941Smrg LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, ""); 649b8e80941Smrg LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, ""); 650b8e80941Smrg 651b8e80941Smrg /* Use v_rcp_f32 instead of precise division. */ 652b8e80941Smrg if (!LLVMIsConstant(ret)) 653b8e80941Smrg LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); 654b8e80941Smrg return ret; 655b8e80941Smrg} 656b8e80941Smrg 657b8e80941Smrg/* See fast_idiv_by_const.h. */ 658b8e80941Smrg/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */ 659b8e80941SmrgLLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, 660b8e80941Smrg LLVMValueRef num, 661b8e80941Smrg LLVMValueRef multiplier, 662b8e80941Smrg LLVMValueRef pre_shift, 663b8e80941Smrg LLVMValueRef post_shift, 664b8e80941Smrg LLVMValueRef increment) 665b8e80941Smrg{ 666b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 667b8e80941Smrg 668b8e80941Smrg num = LLVMBuildLShr(builder, num, pre_shift, ""); 669b8e80941Smrg num = LLVMBuildMul(builder, 670b8e80941Smrg LLVMBuildZExt(builder, num, ctx->i64, ""), 671b8e80941Smrg LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); 672b8e80941Smrg num = LLVMBuildAdd(builder, num, 673b8e80941Smrg LLVMBuildZExt(builder, increment, ctx->i64, ""), ""); 674b8e80941Smrg num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); 675b8e80941Smrg num = LLVMBuildTrunc(builder, num, ctx->i32, ""); 676b8e80941Smrg return LLVMBuildLShr(builder, num, post_shift, ""); 677b8e80941Smrg} 678b8e80941Smrg 679b8e80941Smrg/* See fast_idiv_by_const.h. */ 680b8e80941Smrg/* If num != UINT_MAX, this more efficient version can be used. */ 681b8e80941Smrg/* Set: increment = util_fast_udiv_info::increment; */ 682b8e80941SmrgLLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, 683b8e80941Smrg LLVMValueRef num, 684b8e80941Smrg LLVMValueRef multiplier, 685b8e80941Smrg LLVMValueRef pre_shift, 686b8e80941Smrg LLVMValueRef post_shift, 687b8e80941Smrg LLVMValueRef increment) 688b8e80941Smrg{ 689b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 690b8e80941Smrg 691b8e80941Smrg num = LLVMBuildLShr(builder, num, pre_shift, ""); 692b8e80941Smrg num = LLVMBuildNUWAdd(builder, num, increment, ""); 693b8e80941Smrg num = LLVMBuildMul(builder, 694b8e80941Smrg LLVMBuildZExt(builder, num, ctx->i64, ""), 695b8e80941Smrg LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); 696b8e80941Smrg num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); 697b8e80941Smrg num = LLVMBuildTrunc(builder, num, ctx->i32, ""); 698b8e80941Smrg return LLVMBuildLShr(builder, num, post_shift, ""); 699b8e80941Smrg} 700b8e80941Smrg 701b8e80941Smrg/* See fast_idiv_by_const.h. */ 702b8e80941Smrg/* Both operands must fit in 31 bits and the divisor must not be 1. */ 703b8e80941SmrgLLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, 704b8e80941Smrg LLVMValueRef num, 705b8e80941Smrg LLVMValueRef multiplier, 706b8e80941Smrg LLVMValueRef post_shift) 707b8e80941Smrg{ 708b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 709b8e80941Smrg 710b8e80941Smrg num = LLVMBuildMul(builder, 711b8e80941Smrg LLVMBuildZExt(builder, num, ctx->i64, ""), 712b8e80941Smrg LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); 713b8e80941Smrg num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); 714b8e80941Smrg num = LLVMBuildTrunc(builder, num, ctx->i32, ""); 715b8e80941Smrg return LLVMBuildLShr(builder, num, post_shift, ""); 716b8e80941Smrg} 717b8e80941Smrg 718b8e80941Smrg/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 719b8e80941Smrg * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is 720b8e80941Smrg * already multiplied by two. id is the cube face number. 721b8e80941Smrg */ 722b8e80941Smrgstruct cube_selection_coords { 723b8e80941Smrg LLVMValueRef stc[2]; 724b8e80941Smrg LLVMValueRef ma; 725b8e80941Smrg LLVMValueRef id; 726b8e80941Smrg}; 727b8e80941Smrg 728b8e80941Smrgstatic void 729b8e80941Smrgbuild_cube_intrinsic(struct ac_llvm_context *ctx, 730b8e80941Smrg LLVMValueRef in[3], 731b8e80941Smrg struct cube_selection_coords *out) 732b8e80941Smrg{ 733b8e80941Smrg LLVMTypeRef f32 = ctx->f32; 734b8e80941Smrg 735b8e80941Smrg out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", 736b8e80941Smrg f32, in, 3, AC_FUNC_ATTR_READNONE); 737b8e80941Smrg out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", 738b8e80941Smrg f32, in, 3, AC_FUNC_ATTR_READNONE); 739b8e80941Smrg out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", 740b8e80941Smrg f32, in, 3, AC_FUNC_ATTR_READNONE); 741b8e80941Smrg out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", 742b8e80941Smrg f32, in, 3, AC_FUNC_ATTR_READNONE); 743b8e80941Smrg} 744b8e80941Smrg 745b8e80941Smrg/** 746b8e80941Smrg * Build a manual selection sequence for cube face sc/tc coordinates and 747b8e80941Smrg * major axis vector (multiplied by 2 for consistency) for the given 748b8e80941Smrg * vec3 \p coords, for the face implied by \p selcoords. 749b8e80941Smrg * 750b8e80941Smrg * For the major axis, we always adjust the sign to be in the direction of 751b8e80941Smrg * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards 752b8e80941Smrg * the selcoords major axis. 753b8e80941Smrg */ 754b8e80941Smrgstatic void build_cube_select(struct ac_llvm_context *ctx, 755b8e80941Smrg const struct cube_selection_coords *selcoords, 756b8e80941Smrg const LLVMValueRef *coords, 757b8e80941Smrg LLVMValueRef *out_st, 758b8e80941Smrg LLVMValueRef *out_ma) 759b8e80941Smrg{ 760b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 761b8e80941Smrg LLVMTypeRef f32 = LLVMTypeOf(coords[0]); 762b8e80941Smrg LLVMValueRef is_ma_positive; 763b8e80941Smrg LLVMValueRef sgn_ma; 764b8e80941Smrg LLVMValueRef is_ma_z, is_not_ma_z; 765b8e80941Smrg LLVMValueRef is_ma_y; 766b8e80941Smrg LLVMValueRef is_ma_x; 767b8e80941Smrg LLVMValueRef sgn; 768b8e80941Smrg LLVMValueRef tmp; 769b8e80941Smrg 770b8e80941Smrg is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, 771b8e80941Smrg selcoords->ma, LLVMConstReal(f32, 0.0), ""); 772b8e80941Smrg sgn_ma = LLVMBuildSelect(builder, is_ma_positive, 773b8e80941Smrg LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); 774b8e80941Smrg 775b8e80941Smrg is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); 776b8e80941Smrg is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); 777b8e80941Smrg is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, 778b8e80941Smrg LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); 779b8e80941Smrg is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); 780b8e80941Smrg 781b8e80941Smrg /* Select sc */ 782b8e80941Smrg tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], ""); 783b8e80941Smrg sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), 784b8e80941Smrg LLVMBuildSelect(builder, is_ma_z, sgn_ma, 785b8e80941Smrg LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); 786b8e80941Smrg out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); 787b8e80941Smrg 788b8e80941Smrg /* Select tc */ 789b8e80941Smrg tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); 790b8e80941Smrg sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, 791b8e80941Smrg LLVMConstReal(f32, -1.0), ""); 792b8e80941Smrg out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); 793b8e80941Smrg 794b8e80941Smrg /* Select ma */ 795b8e80941Smrg tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], 796b8e80941Smrg LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); 797b8e80941Smrg tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", 798b8e80941Smrg ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE); 799b8e80941Smrg *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), ""); 800b8e80941Smrg} 801b8e80941Smrg 802b8e80941Smrgvoid 803b8e80941Smrgac_prepare_cube_coords(struct ac_llvm_context *ctx, 804b8e80941Smrg bool is_deriv, bool is_array, bool is_lod, 805b8e80941Smrg LLVMValueRef *coords_arg, 806b8e80941Smrg LLVMValueRef *derivs_arg) 807b8e80941Smrg{ 808b8e80941Smrg 809b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 810b8e80941Smrg struct cube_selection_coords selcoords; 811b8e80941Smrg LLVMValueRef coords[3]; 812b8e80941Smrg LLVMValueRef invma; 813b8e80941Smrg 814b8e80941Smrg if (is_array && !is_lod) { 815b8e80941Smrg LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]); 816b8e80941Smrg 817b8e80941Smrg /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says: 818b8e80941Smrg * 819b8e80941Smrg * "For Array forms, the array layer used will be 820b8e80941Smrg * 821b8e80941Smrg * max(0, min(d−1, floor(layer+0.5))) 822b8e80941Smrg * 823b8e80941Smrg * where d is the depth of the texture array and layer 824b8e80941Smrg * comes from the component indicated in the tables below. 825b8e80941Smrg * Workaroudn for an issue where the layer is taken from a 826b8e80941Smrg * helper invocation which happens to fall on a different 827b8e80941Smrg * layer due to extrapolation." 828b8e80941Smrg * 829b8e80941Smrg * VI and earlier attempt to implement this in hardware by 830b8e80941Smrg * clamping the value of coords[2] = (8 * layer) + face. 831b8e80941Smrg * Unfortunately, this means that the we end up with the wrong 832b8e80941Smrg * face when clamping occurs. 833b8e80941Smrg * 834b8e80941Smrg * Clamp the layer earlier to work around the issue. 835b8e80941Smrg */ 836b8e80941Smrg if (ctx->chip_class <= VI) { 837b8e80941Smrg LLVMValueRef ge0; 838b8e80941Smrg ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); 839b8e80941Smrg tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); 840b8e80941Smrg } 841b8e80941Smrg 842b8e80941Smrg coords_arg[3] = tmp; 843b8e80941Smrg } 844b8e80941Smrg 845b8e80941Smrg build_cube_intrinsic(ctx, coords_arg, &selcoords); 846b8e80941Smrg 847b8e80941Smrg invma = ac_build_intrinsic(ctx, "llvm.fabs.f32", 848b8e80941Smrg ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); 849b8e80941Smrg invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); 850b8e80941Smrg 851b8e80941Smrg for (int i = 0; i < 2; ++i) 852b8e80941Smrg coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); 853b8e80941Smrg 854b8e80941Smrg coords[2] = selcoords.id; 855b8e80941Smrg 856b8e80941Smrg if (is_deriv && derivs_arg) { 857b8e80941Smrg LLVMValueRef derivs[4]; 858b8e80941Smrg int axis; 859b8e80941Smrg 860b8e80941Smrg /* Convert cube derivatives to 2D derivatives. */ 861b8e80941Smrg for (axis = 0; axis < 2; axis++) { 862b8e80941Smrg LLVMValueRef deriv_st[2]; 863b8e80941Smrg LLVMValueRef deriv_ma; 864b8e80941Smrg 865b8e80941Smrg /* Transform the derivative alongside the texture 866b8e80941Smrg * coordinate. Mathematically, the correct formula is 867b8e80941Smrg * as follows. Assume we're projecting onto the +Z face 868b8e80941Smrg * and denote by dx/dh the derivative of the (original) 869b8e80941Smrg * X texture coordinate with respect to horizontal 870b8e80941Smrg * window coordinates. The projection onto the +Z face 871b8e80941Smrg * plane is: 872b8e80941Smrg * 873b8e80941Smrg * f(x,z) = x/z 874b8e80941Smrg * 875b8e80941Smrg * Then df/dh = df/dx * dx/dh + df/dz * dz/dh 876b8e80941Smrg * = 1/z * dx/dh - x/z * 1/z * dz/dh. 877b8e80941Smrg * 878b8e80941Smrg * This motivatives the implementation below. 879b8e80941Smrg * 880b8e80941Smrg * Whether this actually gives the expected results for 881b8e80941Smrg * apps that might feed in derivatives obtained via 882b8e80941Smrg * finite differences is anyone's guess. The OpenGL spec 883b8e80941Smrg * seems awfully quiet about how textureGrad for cube 884b8e80941Smrg * maps should be handled. 885b8e80941Smrg */ 886b8e80941Smrg build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], 887b8e80941Smrg deriv_st, &deriv_ma); 888b8e80941Smrg 889b8e80941Smrg deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); 890b8e80941Smrg 891b8e80941Smrg for (int i = 0; i < 2; ++i) 892b8e80941Smrg derivs[axis * 2 + i] = 893b8e80941Smrg LLVMBuildFSub(builder, 894b8e80941Smrg LLVMBuildFMul(builder, deriv_st[i], invma, ""), 895b8e80941Smrg LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); 896b8e80941Smrg } 897b8e80941Smrg 898b8e80941Smrg memcpy(derivs_arg, derivs, sizeof(derivs)); 899b8e80941Smrg } 900b8e80941Smrg 901b8e80941Smrg /* Shift the texture coordinate. This must be applied after the 902b8e80941Smrg * derivative calculation. 903b8e80941Smrg */ 904b8e80941Smrg for (int i = 0; i < 2; ++i) 905b8e80941Smrg coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); 906b8e80941Smrg 907b8e80941Smrg if (is_array) { 908b8e80941Smrg /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ 909b8e80941Smrg /* coords_arg.w component - array_index for cube arrays */ 910b8e80941Smrg coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]); 911b8e80941Smrg } 912b8e80941Smrg 913b8e80941Smrg memcpy(coords_arg, coords, sizeof(coords)); 914b8e80941Smrg} 915b8e80941Smrg 916b8e80941Smrg 917b8e80941SmrgLLVMValueRef 918b8e80941Smrgac_build_fs_interp(struct ac_llvm_context *ctx, 919b8e80941Smrg LLVMValueRef llvm_chan, 920b8e80941Smrg LLVMValueRef attr_number, 921b8e80941Smrg LLVMValueRef params, 922b8e80941Smrg LLVMValueRef i, 923b8e80941Smrg LLVMValueRef j) 924b8e80941Smrg{ 925b8e80941Smrg LLVMValueRef args[5]; 926b8e80941Smrg LLVMValueRef p1; 927b8e80941Smrg 928b8e80941Smrg args[0] = i; 929b8e80941Smrg args[1] = llvm_chan; 930b8e80941Smrg args[2] = attr_number; 931b8e80941Smrg args[3] = params; 932b8e80941Smrg 933b8e80941Smrg p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", 934b8e80941Smrg ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); 935b8e80941Smrg 936b8e80941Smrg args[0] = p1; 937b8e80941Smrg args[1] = j; 938b8e80941Smrg args[2] = llvm_chan; 939b8e80941Smrg args[3] = attr_number; 940b8e80941Smrg args[4] = params; 941b8e80941Smrg 942b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", 943b8e80941Smrg ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); 944b8e80941Smrg} 945b8e80941Smrg 946b8e80941SmrgLLVMValueRef 947b8e80941Smrgac_build_fs_interp_f16(struct ac_llvm_context *ctx, 948b8e80941Smrg LLVMValueRef llvm_chan, 949b8e80941Smrg LLVMValueRef attr_number, 950b8e80941Smrg LLVMValueRef params, 951b8e80941Smrg LLVMValueRef i, 952b8e80941Smrg LLVMValueRef j) 953b8e80941Smrg{ 954b8e80941Smrg LLVMValueRef args[6]; 955b8e80941Smrg LLVMValueRef p1; 956b8e80941Smrg 957b8e80941Smrg args[0] = i; 958b8e80941Smrg args[1] = llvm_chan; 959b8e80941Smrg args[2] = attr_number; 960b8e80941Smrg args[3] = ctx->i1false; 961b8e80941Smrg args[4] = params; 962b8e80941Smrg 963b8e80941Smrg p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", 964b8e80941Smrg ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); 965b8e80941Smrg 966b8e80941Smrg args[0] = p1; 967b8e80941Smrg args[1] = j; 968b8e80941Smrg args[2] = llvm_chan; 969b8e80941Smrg args[3] = attr_number; 970b8e80941Smrg args[4] = ctx->i1false; 971b8e80941Smrg args[5] = params; 972b8e80941Smrg 973b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", 974b8e80941Smrg ctx->f16, args, 6, AC_FUNC_ATTR_READNONE); 975b8e80941Smrg} 976b8e80941Smrg 977b8e80941SmrgLLVMValueRef 978b8e80941Smrgac_build_fs_interp_mov(struct ac_llvm_context *ctx, 979b8e80941Smrg LLVMValueRef parameter, 980b8e80941Smrg LLVMValueRef llvm_chan, 981b8e80941Smrg LLVMValueRef attr_number, 982b8e80941Smrg LLVMValueRef params) 983b8e80941Smrg{ 984b8e80941Smrg LLVMValueRef args[4]; 985b8e80941Smrg 986b8e80941Smrg args[0] = parameter; 987b8e80941Smrg args[1] = llvm_chan; 988b8e80941Smrg args[2] = attr_number; 989b8e80941Smrg args[3] = params; 990b8e80941Smrg 991b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", 992b8e80941Smrg ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); 993b8e80941Smrg} 994b8e80941Smrg 995b8e80941SmrgLLVMValueRef 996b8e80941Smrgac_build_gep_ptr(struct ac_llvm_context *ctx, 997b8e80941Smrg LLVMValueRef base_ptr, 998b8e80941Smrg LLVMValueRef index) 999b8e80941Smrg{ 1000b8e80941Smrg return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); 1001b8e80941Smrg} 1002b8e80941Smrg 1003b8e80941SmrgLLVMValueRef 1004b8e80941Smrgac_build_gep0(struct ac_llvm_context *ctx, 1005b8e80941Smrg LLVMValueRef base_ptr, 1006b8e80941Smrg LLVMValueRef index) 1007b8e80941Smrg{ 1008b8e80941Smrg LLVMValueRef indices[2] = { 1009b8e80941Smrg ctx->i32_0, 1010b8e80941Smrg index, 1011b8e80941Smrg }; 1012b8e80941Smrg return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, ""); 1013b8e80941Smrg} 1014b8e80941Smrg 1015b8e80941SmrgLLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, 1016b8e80941Smrg LLVMValueRef index) 1017b8e80941Smrg{ 1018b8e80941Smrg return LLVMBuildPointerCast(ctx->builder, 1019b8e80941Smrg ac_build_gep0(ctx, ptr, index), 1020b8e80941Smrg LLVMTypeOf(ptr), ""); 1021b8e80941Smrg} 1022b8e80941Smrg 1023b8e80941Smrgvoid 1024b8e80941Smrgac_build_indexed_store(struct ac_llvm_context *ctx, 1025b8e80941Smrg LLVMValueRef base_ptr, LLVMValueRef index, 1026b8e80941Smrg LLVMValueRef value) 1027b8e80941Smrg{ 1028b8e80941Smrg LLVMBuildStore(ctx->builder, value, 1029b8e80941Smrg ac_build_gep0(ctx, base_ptr, index)); 1030b8e80941Smrg} 1031b8e80941Smrg 1032b8e80941Smrg/** 1033b8e80941Smrg * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. 1034b8e80941Smrg * It's equivalent to doing a load from &base_ptr[index]. 1035b8e80941Smrg * 1036b8e80941Smrg * \param base_ptr Where the array starts. 1037b8e80941Smrg * \param index The element index into the array. 1038b8e80941Smrg * \param uniform Whether the base_ptr and index can be assumed to be 1039b8e80941Smrg * dynamically uniform (i.e. load to an SGPR) 1040b8e80941Smrg * \param invariant Whether the load is invariant (no other opcodes affect it) 1041b8e80941Smrg * \param no_unsigned_wraparound 1042b8e80941Smrg * For all possible re-associations and re-distributions of an expression 1043b8e80941Smrg * "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs 1044b8e80941Smrg * without inbounds in base_ptr), this parameter is true if "addr + offset" 1045b8e80941Smrg * does not result in an unsigned integer wraparound. This is used for 1046b8e80941Smrg * optimal code generation of 32-bit pointer arithmetic. 1047b8e80941Smrg * 1048b8e80941Smrg * For example, a 32-bit immediate offset that causes a 32-bit unsigned 1049b8e80941Smrg * integer wraparound can't be an imm offset in s_load_dword, because 1050b8e80941Smrg * the instruction performs "addr + offset" in 64 bits. 1051b8e80941Smrg * 1052b8e80941Smrg * Expected usage for bindless textures by chaining GEPs: 1053b8e80941Smrg * // possible unsigned wraparound, don't use InBounds: 1054b8e80941Smrg * ptr1 = LLVMBuildGEP(base_ptr, index); 1055b8e80941Smrg * image = load(ptr1); // becomes "s_load ptr1, 0" 1056b8e80941Smrg * 1057b8e80941Smrg * ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize); 1058b8e80941Smrg * sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds 1059b8e80941Smrg */ 1060b8e80941Smrgstatic LLVMValueRef 1061b8e80941Smrgac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 1062b8e80941Smrg LLVMValueRef index, bool uniform, bool invariant, 1063b8e80941Smrg bool no_unsigned_wraparound) 1064b8e80941Smrg{ 1065b8e80941Smrg LLVMValueRef pointer, result; 1066b8e80941Smrg LLVMValueRef indices[2] = {ctx->i32_0, index}; 1067b8e80941Smrg 1068b8e80941Smrg if (no_unsigned_wraparound && 1069b8e80941Smrg LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT) 1070b8e80941Smrg pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, indices, 2, ""); 1071b8e80941Smrg else 1072b8e80941Smrg pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, ""); 1073b8e80941Smrg 1074b8e80941Smrg if (uniform) 1075b8e80941Smrg LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); 1076b8e80941Smrg result = LLVMBuildLoad(ctx->builder, pointer, ""); 1077b8e80941Smrg if (invariant) 1078b8e80941Smrg LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); 1079b8e80941Smrg return result; 1080b8e80941Smrg} 1081b8e80941Smrg 1082b8e80941SmrgLLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 1083b8e80941Smrg LLVMValueRef index) 1084b8e80941Smrg{ 1085b8e80941Smrg return ac_build_load_custom(ctx, base_ptr, index, false, false, false); 1086b8e80941Smrg} 1087b8e80941Smrg 1088b8e80941SmrgLLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, 1089b8e80941Smrg LLVMValueRef base_ptr, LLVMValueRef index) 1090b8e80941Smrg{ 1091b8e80941Smrg return ac_build_load_custom(ctx, base_ptr, index, false, true, false); 1092b8e80941Smrg} 1093b8e80941Smrg 1094b8e80941Smrg/* This assumes that there is no unsigned integer wraparound during the address 1095b8e80941Smrg * computation, excluding all GEPs within base_ptr. */ 1096b8e80941SmrgLLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, 1097b8e80941Smrg LLVMValueRef base_ptr, LLVMValueRef index) 1098b8e80941Smrg{ 1099b8e80941Smrg return ac_build_load_custom(ctx, base_ptr, index, true, true, true); 1100b8e80941Smrg} 1101b8e80941Smrg 1102b8e80941Smrg/* See ac_build_load_custom() documentation. */ 1103b8e80941SmrgLLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, 1104b8e80941Smrg LLVMValueRef base_ptr, LLVMValueRef index) 1105b8e80941Smrg{ 1106b8e80941Smrg return ac_build_load_custom(ctx, base_ptr, index, true, true, false); 1107b8e80941Smrg} 1108b8e80941Smrg 1109b8e80941Smrgstatic void 1110b8e80941Smrgac_build_buffer_store_common(struct ac_llvm_context *ctx, 1111b8e80941Smrg LLVMValueRef rsrc, 1112b8e80941Smrg LLVMValueRef data, 1113b8e80941Smrg LLVMValueRef vindex, 1114b8e80941Smrg LLVMValueRef voffset, 1115b8e80941Smrg unsigned num_channels, 1116b8e80941Smrg bool glc, 1117b8e80941Smrg bool slc, 1118b8e80941Smrg bool writeonly_memory, 1119b8e80941Smrg bool use_format) 1120b8e80941Smrg{ 1121b8e80941Smrg LLVMValueRef args[] = { 1122b8e80941Smrg data, 1123b8e80941Smrg LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), 1124b8e80941Smrg vindex ? vindex : ctx->i32_0, 1125b8e80941Smrg voffset, 1126b8e80941Smrg LLVMConstInt(ctx->i1, glc, 0), 1127b8e80941Smrg LLVMConstInt(ctx->i1, slc, 0) 1128b8e80941Smrg }; 1129b8e80941Smrg unsigned func = CLAMP(num_channels, 1, 3) - 1; 1130b8e80941Smrg 1131b8e80941Smrg const char *type_names[] = {"f32", "v2f32", "v4f32"}; 1132b8e80941Smrg char name[256]; 1133b8e80941Smrg 1134b8e80941Smrg if (use_format) { 1135b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.format.%s", 1136b8e80941Smrg type_names[func]); 1137b8e80941Smrg } else { 1138b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s", 1139b8e80941Smrg type_names[func]); 1140b8e80941Smrg } 1141b8e80941Smrg 1142b8e80941Smrg ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args), 1143b8e80941Smrg ac_get_store_intr_attribs(writeonly_memory)); 1144b8e80941Smrg} 1145b8e80941Smrg 1146b8e80941Smrgstatic void 1147b8e80941Smrgac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx, 1148b8e80941Smrg LLVMValueRef rsrc, 1149b8e80941Smrg LLVMValueRef data, 1150b8e80941Smrg LLVMValueRef vindex, 1151b8e80941Smrg LLVMValueRef voffset, 1152b8e80941Smrg LLVMValueRef soffset, 1153b8e80941Smrg unsigned num_channels, 1154b8e80941Smrg LLVMTypeRef return_channel_type, 1155b8e80941Smrg bool glc, 1156b8e80941Smrg bool slc, 1157b8e80941Smrg bool writeonly_memory, 1158b8e80941Smrg bool use_format, 1159b8e80941Smrg bool structurized) 1160b8e80941Smrg{ 1161b8e80941Smrg LLVMValueRef args[6]; 1162b8e80941Smrg int idx = 0; 1163b8e80941Smrg args[idx++] = data; 1164b8e80941Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 1165b8e80941Smrg if (structurized) 1166b8e80941Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 1167b8e80941Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 1168b8e80941Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 1169b8e80941Smrg args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); 1170b8e80941Smrg unsigned func = num_channels == 3 ? 4 : num_channels; 1171b8e80941Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 1172b8e80941Smrg char name[256], type_name[8]; 1173b8e80941Smrg 1174b8e80941Smrg LLVMTypeRef type = func > 1 ? LLVMVectorType(return_channel_type, func) : return_channel_type; 1175b8e80941Smrg ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); 1176b8e80941Smrg 1177b8e80941Smrg if (use_format) { 1178b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", 1179b8e80941Smrg indexing_kind, type_name); 1180b8e80941Smrg } else { 1181b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s", 1182b8e80941Smrg indexing_kind, type_name); 1183b8e80941Smrg } 1184b8e80941Smrg 1185b8e80941Smrg ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, 1186b8e80941Smrg ac_get_store_intr_attribs(writeonly_memory)); 1187b8e80941Smrg} 1188b8e80941Smrg 1189b8e80941Smrgvoid 1190b8e80941Smrgac_build_buffer_store_format(struct ac_llvm_context *ctx, 1191b8e80941Smrg LLVMValueRef rsrc, 1192b8e80941Smrg LLVMValueRef data, 1193b8e80941Smrg LLVMValueRef vindex, 1194b8e80941Smrg LLVMValueRef voffset, 1195b8e80941Smrg unsigned num_channels, 1196b8e80941Smrg bool glc, 1197b8e80941Smrg bool writeonly_memory) 1198b8e80941Smrg{ 1199b8e80941Smrg if (HAVE_LLVM >= 0x800) { 1200b8e80941Smrg ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex, 1201b8e80941Smrg voffset, NULL, num_channels, 1202b8e80941Smrg ctx->f32, glc, false, 1203b8e80941Smrg writeonly_memory, true, true); 1204b8e80941Smrg } else { 1205b8e80941Smrg ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, 1206b8e80941Smrg num_channels, glc, false, 1207b8e80941Smrg writeonly_memory, true); 1208b8e80941Smrg } 1209b8e80941Smrg} 1210b8e80941Smrg 1211b8e80941Smrg/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. 1212b8e80941Smrg * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), 1213b8e80941Smrg * or v4i32 (num_channels=3,4). 1214b8e80941Smrg */ 1215b8e80941Smrgvoid 1216b8e80941Smrgac_build_buffer_store_dword(struct ac_llvm_context *ctx, 1217b8e80941Smrg LLVMValueRef rsrc, 1218b8e80941Smrg LLVMValueRef vdata, 1219b8e80941Smrg unsigned num_channels, 1220b8e80941Smrg LLVMValueRef voffset, 1221b8e80941Smrg LLVMValueRef soffset, 1222b8e80941Smrg unsigned inst_offset, 1223b8e80941Smrg bool glc, 1224b8e80941Smrg bool slc, 1225b8e80941Smrg bool writeonly_memory, 1226b8e80941Smrg bool swizzle_enable_hint) 1227b8e80941Smrg{ 1228b8e80941Smrg /* Split 3 channel stores, becase LLVM doesn't support 3-channel 1229b8e80941Smrg * intrinsics. */ 1230b8e80941Smrg if (num_channels == 3) { 1231b8e80941Smrg LLVMValueRef v[3], v01; 1232b8e80941Smrg 1233b8e80941Smrg for (int i = 0; i < 3; i++) { 1234b8e80941Smrg v[i] = LLVMBuildExtractElement(ctx->builder, vdata, 1235b8e80941Smrg LLVMConstInt(ctx->i32, i, 0), ""); 1236b8e80941Smrg } 1237b8e80941Smrg v01 = ac_build_gather_values(ctx, v, 2); 1238b8e80941Smrg 1239b8e80941Smrg ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, 1240b8e80941Smrg soffset, inst_offset, glc, slc, 1241b8e80941Smrg writeonly_memory, swizzle_enable_hint); 1242b8e80941Smrg ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, 1243b8e80941Smrg soffset, inst_offset + 8, 1244b8e80941Smrg glc, slc, 1245b8e80941Smrg writeonly_memory, swizzle_enable_hint); 1246b8e80941Smrg return; 1247b8e80941Smrg } 1248b8e80941Smrg 1249b8e80941Smrg /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset 1250b8e80941Smrg * (voffset is swizzled, but soffset isn't swizzled). 1251b8e80941Smrg * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. 1252b8e80941Smrg */ 1253b8e80941Smrg if (!swizzle_enable_hint) { 1254b8e80941Smrg LLVMValueRef offset = soffset; 1255b8e80941Smrg 1256b8e80941Smrg if (inst_offset) 1257b8e80941Smrg offset = LLVMBuildAdd(ctx->builder, offset, 1258b8e80941Smrg LLVMConstInt(ctx->i32, inst_offset, 0), ""); 1259b8e80941Smrg 1260b8e80941Smrg if (HAVE_LLVM >= 0x800) { 1261b8e80941Smrg ac_build_llvm8_buffer_store_common(ctx, rsrc, 1262b8e80941Smrg ac_to_float(ctx, vdata), 1263b8e80941Smrg ctx->i32_0, 1264b8e80941Smrg voffset, offset, 1265b8e80941Smrg num_channels, 1266b8e80941Smrg ctx->f32, 1267b8e80941Smrg glc, slc, 1268b8e80941Smrg writeonly_memory, 1269b8e80941Smrg false, false); 1270b8e80941Smrg } else { 1271b8e80941Smrg if (voffset) 1272b8e80941Smrg offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); 1273b8e80941Smrg 1274b8e80941Smrg ac_build_buffer_store_common(ctx, rsrc, 1275b8e80941Smrg ac_to_float(ctx, vdata), 1276b8e80941Smrg ctx->i32_0, offset, 1277b8e80941Smrg num_channels, glc, slc, 1278b8e80941Smrg writeonly_memory, false); 1279b8e80941Smrg } 1280b8e80941Smrg return; 1281b8e80941Smrg } 1282b8e80941Smrg 1283b8e80941Smrg static const unsigned dfmts[] = { 1284b8e80941Smrg V_008F0C_BUF_DATA_FORMAT_32, 1285b8e80941Smrg V_008F0C_BUF_DATA_FORMAT_32_32, 1286b8e80941Smrg V_008F0C_BUF_DATA_FORMAT_32_32_32, 1287b8e80941Smrg V_008F0C_BUF_DATA_FORMAT_32_32_32_32 1288b8e80941Smrg }; 1289b8e80941Smrg unsigned dfmt = dfmts[num_channels - 1]; 1290b8e80941Smrg unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; 1291b8e80941Smrg LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0); 1292b8e80941Smrg 1293b8e80941Smrg ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, 1294b8e80941Smrg immoffset, num_channels, dfmt, nfmt, glc, 1295b8e80941Smrg slc, writeonly_memory); 1296b8e80941Smrg} 1297b8e80941Smrg 1298b8e80941Smrgstatic LLVMValueRef 1299b8e80941Smrgac_build_buffer_load_common(struct ac_llvm_context *ctx, 1300b8e80941Smrg LLVMValueRef rsrc, 1301b8e80941Smrg LLVMValueRef vindex, 1302b8e80941Smrg LLVMValueRef voffset, 1303b8e80941Smrg unsigned num_channels, 1304b8e80941Smrg bool glc, 1305b8e80941Smrg bool slc, 1306b8e80941Smrg bool can_speculate, 1307b8e80941Smrg bool use_format) 1308b8e80941Smrg{ 1309b8e80941Smrg LLVMValueRef args[] = { 1310b8e80941Smrg LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), 1311b8e80941Smrg vindex ? vindex : ctx->i32_0, 1312b8e80941Smrg voffset, 1313b8e80941Smrg LLVMConstInt(ctx->i1, glc, 0), 1314b8e80941Smrg LLVMConstInt(ctx->i1, slc, 0) 1315b8e80941Smrg }; 1316b8e80941Smrg unsigned func = CLAMP(num_channels, 1, 3) - 1; 1317b8e80941Smrg 1318b8e80941Smrg LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32}; 1319b8e80941Smrg const char *type_names[] = {"f32", "v2f32", "v4f32"}; 1320b8e80941Smrg char name[256]; 1321b8e80941Smrg 1322b8e80941Smrg if (use_format) { 1323b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.format.%s", 1324b8e80941Smrg type_names[func]); 1325b8e80941Smrg } else { 1326b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", 1327b8e80941Smrg type_names[func]); 1328b8e80941Smrg } 1329b8e80941Smrg 1330b8e80941Smrg return ac_build_intrinsic(ctx, name, types[func], args, 1331b8e80941Smrg ARRAY_SIZE(args), 1332b8e80941Smrg ac_get_load_intr_attribs(can_speculate)); 1333b8e80941Smrg} 1334b8e80941Smrg 1335b8e80941Smrgstatic LLVMValueRef 1336b8e80941Smrgac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, 1337b8e80941Smrg LLVMValueRef rsrc, 1338b8e80941Smrg LLVMValueRef vindex, 1339b8e80941Smrg LLVMValueRef voffset, 1340b8e80941Smrg LLVMValueRef soffset, 1341b8e80941Smrg unsigned num_channels, 1342b8e80941Smrg LLVMTypeRef channel_type, 1343b8e80941Smrg bool glc, 1344b8e80941Smrg bool slc, 1345b8e80941Smrg bool can_speculate, 1346b8e80941Smrg bool use_format, 1347b8e80941Smrg bool structurized) 1348b8e80941Smrg{ 1349b8e80941Smrg LLVMValueRef args[5]; 1350b8e80941Smrg int idx = 0; 1351b8e80941Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 1352b8e80941Smrg if (structurized) 1353b8e80941Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 1354b8e80941Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 1355b8e80941Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 1356b8e80941Smrg args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); 1357b8e80941Smrg unsigned func = num_channels == 3 ? 4 : num_channels; 1358b8e80941Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 1359b8e80941Smrg char name[256], type_name[8]; 1360b8e80941Smrg 1361b8e80941Smrg LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type; 1362b8e80941Smrg ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); 1363b8e80941Smrg 1364b8e80941Smrg if (use_format) { 1365b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", 1366b8e80941Smrg indexing_kind, type_name); 1367b8e80941Smrg } else { 1368b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", 1369b8e80941Smrg indexing_kind, type_name); 1370b8e80941Smrg } 1371b8e80941Smrg 1372b8e80941Smrg return ac_build_intrinsic(ctx, name, type, args, idx, 1373b8e80941Smrg ac_get_load_intr_attribs(can_speculate)); 1374b8e80941Smrg} 1375b8e80941Smrg 1376b8e80941SmrgLLVMValueRef 1377b8e80941Smrgac_build_buffer_load(struct ac_llvm_context *ctx, 1378b8e80941Smrg LLVMValueRef rsrc, 1379b8e80941Smrg int num_channels, 1380b8e80941Smrg LLVMValueRef vindex, 1381b8e80941Smrg LLVMValueRef voffset, 1382b8e80941Smrg LLVMValueRef soffset, 1383b8e80941Smrg unsigned inst_offset, 1384b8e80941Smrg unsigned glc, 1385b8e80941Smrg unsigned slc, 1386b8e80941Smrg bool can_speculate, 1387b8e80941Smrg bool allow_smem) 1388b8e80941Smrg{ 1389b8e80941Smrg LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0); 1390b8e80941Smrg if (voffset) 1391b8e80941Smrg offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); 1392b8e80941Smrg if (soffset) 1393b8e80941Smrg offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); 1394b8e80941Smrg 1395b8e80941Smrg if (allow_smem && !slc && 1396b8e80941Smrg (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= VI))) { 1397b8e80941Smrg assert(vindex == NULL); 1398b8e80941Smrg 1399b8e80941Smrg LLVMValueRef result[8]; 1400b8e80941Smrg 1401b8e80941Smrg for (int i = 0; i < num_channels; i++) { 1402b8e80941Smrg if (i) { 1403b8e80941Smrg offset = LLVMBuildAdd(ctx->builder, offset, 1404b8e80941Smrg LLVMConstInt(ctx->i32, 4, 0), ""); 1405b8e80941Smrg } 1406b8e80941Smrg const char *intrname = 1407b8e80941Smrg HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.s.buffer.load.f32" 1408b8e80941Smrg : "llvm.SI.load.const.v4i32"; 1409b8e80941Smrg unsigned num_args = HAVE_LLVM >= 0x0800 ? 3 : 2; 1410b8e80941Smrg LLVMValueRef args[3] = { 1411b8e80941Smrg rsrc, 1412b8e80941Smrg offset, 1413b8e80941Smrg glc ? ctx->i32_1 : ctx->i32_0, 1414b8e80941Smrg }; 1415b8e80941Smrg result[i] = ac_build_intrinsic(ctx, intrname, 1416b8e80941Smrg ctx->f32, args, num_args, 1417b8e80941Smrg AC_FUNC_ATTR_READNONE | 1418b8e80941Smrg (HAVE_LLVM < 0x0800 ? AC_FUNC_ATTR_LEGACY : 0)); 1419b8e80941Smrg } 1420b8e80941Smrg if (num_channels == 1) 1421b8e80941Smrg return result[0]; 1422b8e80941Smrg 1423b8e80941Smrg if (num_channels == 3) 1424b8e80941Smrg result[num_channels++] = LLVMGetUndef(ctx->f32); 1425b8e80941Smrg return ac_build_gather_values(ctx, result, num_channels); 1426b8e80941Smrg } 1427b8e80941Smrg 1428b8e80941Smrg if (HAVE_LLVM >= 0x0800) { 1429b8e80941Smrg return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, 1430b8e80941Smrg offset, ctx->i32_0, 1431b8e80941Smrg num_channels, ctx->f32, 1432b8e80941Smrg glc, slc, 1433b8e80941Smrg can_speculate, false, 1434b8e80941Smrg false); 1435b8e80941Smrg } 1436b8e80941Smrg 1437b8e80941Smrg return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, 1438b8e80941Smrg num_channels, glc, slc, 1439b8e80941Smrg can_speculate, false); 1440b8e80941Smrg} 1441b8e80941Smrg 1442b8e80941SmrgLLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, 1443b8e80941Smrg LLVMValueRef rsrc, 1444b8e80941Smrg LLVMValueRef vindex, 1445b8e80941Smrg LLVMValueRef voffset, 1446b8e80941Smrg unsigned num_channels, 1447b8e80941Smrg bool glc, 1448b8e80941Smrg bool can_speculate) 1449b8e80941Smrg{ 1450b8e80941Smrg if (HAVE_LLVM >= 0x800) { 1451b8e80941Smrg return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, 1452b8e80941Smrg num_channels, ctx->f32, 1453b8e80941Smrg glc, false, 1454b8e80941Smrg can_speculate, true, true); 1455b8e80941Smrg } 1456b8e80941Smrg return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, 1457b8e80941Smrg num_channels, glc, false, 1458b8e80941Smrg can_speculate, true); 1459b8e80941Smrg} 1460b8e80941Smrg 1461b8e80941SmrgLLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx, 1462b8e80941Smrg LLVMValueRef rsrc, 1463b8e80941Smrg LLVMValueRef vindex, 1464b8e80941Smrg LLVMValueRef voffset, 1465b8e80941Smrg unsigned num_channels, 1466b8e80941Smrg bool glc, 1467b8e80941Smrg bool can_speculate) 1468b8e80941Smrg{ 1469b8e80941Smrg if (HAVE_LLVM >= 0x800) { 1470b8e80941Smrg return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, 1471b8e80941Smrg num_channels, ctx->f32, 1472b8e80941Smrg glc, false, 1473b8e80941Smrg can_speculate, true, true); 1474b8e80941Smrg } 1475b8e80941Smrg 1476b8e80941Smrg LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), ""); 1477b8e80941Smrg LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, ""); 1478b8e80941Smrg stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), ""); 1479b8e80941Smrg 1480b8e80941Smrg LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder, 1481b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""), 1482b8e80941Smrg elem_count, stride, ""); 1483b8e80941Smrg 1484b8e80941Smrg LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count, 1485b8e80941Smrg LLVMConstInt(ctx->i32, 2, 0), ""); 1486b8e80941Smrg 1487b8e80941Smrg return ac_build_buffer_load_common(ctx, new_rsrc, vindex, voffset, 1488b8e80941Smrg num_channels, glc, false, 1489b8e80941Smrg can_speculate, true); 1490b8e80941Smrg} 1491b8e80941Smrg 1492b8e80941Smrgstatic LLVMValueRef 1493b8e80941Smrgac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, 1494b8e80941Smrg LLVMValueRef rsrc, 1495b8e80941Smrg LLVMValueRef vindex, 1496b8e80941Smrg LLVMValueRef voffset, 1497b8e80941Smrg LLVMValueRef soffset, 1498b8e80941Smrg unsigned num_channels, 1499b8e80941Smrg unsigned dfmt, 1500b8e80941Smrg unsigned nfmt, 1501b8e80941Smrg bool glc, 1502b8e80941Smrg bool slc, 1503b8e80941Smrg bool can_speculate, 1504b8e80941Smrg bool structurized) 1505b8e80941Smrg{ 1506b8e80941Smrg LLVMValueRef args[6]; 1507b8e80941Smrg int idx = 0; 1508b8e80941Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 1509b8e80941Smrg if (structurized) 1510b8e80941Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 1511b8e80941Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 1512b8e80941Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 1513b8e80941Smrg args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); 1514b8e80941Smrg args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); 1515b8e80941Smrg unsigned func = num_channels == 3 ? 4 : num_channels; 1516b8e80941Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 1517b8e80941Smrg char name[256], type_name[8]; 1518b8e80941Smrg 1519b8e80941Smrg LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; 1520b8e80941Smrg ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); 1521b8e80941Smrg 1522b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s", 1523b8e80941Smrg indexing_kind, type_name); 1524b8e80941Smrg 1525b8e80941Smrg return ac_build_intrinsic(ctx, name, type, args, idx, 1526b8e80941Smrg ac_get_load_intr_attribs(can_speculate)); 1527b8e80941Smrg} 1528b8e80941Smrg 1529b8e80941Smrgstatic LLVMValueRef 1530b8e80941Smrgac_build_tbuffer_load(struct ac_llvm_context *ctx, 1531b8e80941Smrg LLVMValueRef rsrc, 1532b8e80941Smrg LLVMValueRef vindex, 1533b8e80941Smrg LLVMValueRef voffset, 1534b8e80941Smrg LLVMValueRef soffset, 1535b8e80941Smrg LLVMValueRef immoffset, 1536b8e80941Smrg unsigned num_channels, 1537b8e80941Smrg unsigned dfmt, 1538b8e80941Smrg unsigned nfmt, 1539b8e80941Smrg bool glc, 1540b8e80941Smrg bool slc, 1541b8e80941Smrg bool can_speculate, 1542b8e80941Smrg bool structurized) /* only matters for LLVM 8+ */ 1543b8e80941Smrg{ 1544b8e80941Smrg if (HAVE_LLVM >= 0x800) { 1545b8e80941Smrg voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); 1546b8e80941Smrg 1547b8e80941Smrg return ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset, 1548b8e80941Smrg soffset, num_channels, 1549b8e80941Smrg dfmt, nfmt, glc, slc, 1550b8e80941Smrg can_speculate, structurized); 1551b8e80941Smrg } 1552b8e80941Smrg 1553b8e80941Smrg LLVMValueRef args[] = { 1554b8e80941Smrg rsrc, 1555b8e80941Smrg vindex ? vindex : ctx->i32_0, 1556b8e80941Smrg voffset, 1557b8e80941Smrg soffset, 1558b8e80941Smrg immoffset, 1559b8e80941Smrg LLVMConstInt(ctx->i32, dfmt, false), 1560b8e80941Smrg LLVMConstInt(ctx->i32, nfmt, false), 1561b8e80941Smrg LLVMConstInt(ctx->i1, glc, false), 1562b8e80941Smrg LLVMConstInt(ctx->i1, slc, false), 1563b8e80941Smrg }; 1564b8e80941Smrg unsigned func = CLAMP(num_channels, 1, 3) - 1; 1565b8e80941Smrg LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32}; 1566b8e80941Smrg const char *type_names[] = {"i32", "v2i32", "v4i32"}; 1567b8e80941Smrg char name[256]; 1568b8e80941Smrg 1569b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.load.%s", 1570b8e80941Smrg type_names[func]); 1571b8e80941Smrg 1572b8e80941Smrg return ac_build_intrinsic(ctx, name, types[func], args, 9, 1573b8e80941Smrg ac_get_load_intr_attribs(can_speculate)); 1574b8e80941Smrg} 1575b8e80941Smrg 1576b8e80941SmrgLLVMValueRef 1577b8e80941Smrgac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, 1578b8e80941Smrg LLVMValueRef rsrc, 1579b8e80941Smrg LLVMValueRef vindex, 1580b8e80941Smrg LLVMValueRef voffset, 1581b8e80941Smrg LLVMValueRef soffset, 1582b8e80941Smrg LLVMValueRef immoffset, 1583b8e80941Smrg unsigned num_channels, 1584b8e80941Smrg unsigned dfmt, 1585b8e80941Smrg unsigned nfmt, 1586b8e80941Smrg bool glc, 1587b8e80941Smrg bool slc, 1588b8e80941Smrg bool can_speculate) 1589b8e80941Smrg{ 1590b8e80941Smrg return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset, 1591b8e80941Smrg immoffset, num_channels, dfmt, nfmt, glc, 1592b8e80941Smrg slc, can_speculate, true); 1593b8e80941Smrg} 1594b8e80941Smrg 1595b8e80941SmrgLLVMValueRef 1596b8e80941Smrgac_build_raw_tbuffer_load(struct ac_llvm_context *ctx, 1597b8e80941Smrg LLVMValueRef rsrc, 1598b8e80941Smrg LLVMValueRef voffset, 1599b8e80941Smrg LLVMValueRef soffset, 1600b8e80941Smrg LLVMValueRef immoffset, 1601b8e80941Smrg unsigned num_channels, 1602b8e80941Smrg unsigned dfmt, 1603b8e80941Smrg unsigned nfmt, 1604b8e80941Smrg bool glc, 1605b8e80941Smrg bool slc, 1606b8e80941Smrg bool can_speculate) 1607b8e80941Smrg{ 1608b8e80941Smrg return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset, 1609b8e80941Smrg immoffset, num_channels, dfmt, nfmt, glc, 1610b8e80941Smrg slc, can_speculate, false); 1611b8e80941Smrg} 1612b8e80941Smrg 1613b8e80941SmrgLLVMValueRef 1614b8e80941Smrgac_build_tbuffer_load_short(struct ac_llvm_context *ctx, 1615b8e80941Smrg LLVMValueRef rsrc, 1616b8e80941Smrg LLVMValueRef voffset, 1617b8e80941Smrg LLVMValueRef soffset, 1618b8e80941Smrg LLVMValueRef immoffset, 1619b8e80941Smrg bool glc) 1620b8e80941Smrg{ 1621b8e80941Smrg LLVMValueRef res; 1622b8e80941Smrg 1623b8e80941Smrg if (HAVE_LLVM >= 0x900) { 1624b8e80941Smrg voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); 1625b8e80941Smrg 1626b8e80941Smrg /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ 1627b8e80941Smrg res = ac_build_llvm8_buffer_load_common(ctx, rsrc, NULL, 1628b8e80941Smrg voffset, soffset, 1629b8e80941Smrg 1, ctx->i16, glc, false, 1630b8e80941Smrg false, false, false); 1631b8e80941Smrg } else { 1632b8e80941Smrg unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; 1633b8e80941Smrg unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; 1634b8e80941Smrg 1635b8e80941Smrg res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, 1636b8e80941Smrg immoffset, 1, dfmt, nfmt, glc, false, 1637b8e80941Smrg false); 1638b8e80941Smrg 1639b8e80941Smrg res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, ""); 1640b8e80941Smrg } 1641b8e80941Smrg 1642b8e80941Smrg return res; 1643b8e80941Smrg} 1644b8e80941Smrg 1645b8e80941SmrgLLVMValueRef 1646b8e80941Smrgac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, 1647b8e80941Smrg LLVMValueRef rsrc, 1648b8e80941Smrg LLVMValueRef voffset, 1649b8e80941Smrg LLVMValueRef soffset, 1650b8e80941Smrg LLVMValueRef immoffset, 1651b8e80941Smrg bool glc) 1652b8e80941Smrg{ 1653b8e80941Smrg LLVMValueRef res; 1654b8e80941Smrg 1655b8e80941Smrg if (HAVE_LLVM >= 0x900) { 1656b8e80941Smrg voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); 1657b8e80941Smrg 1658b8e80941Smrg /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ 1659b8e80941Smrg res = ac_build_llvm8_buffer_load_common(ctx, rsrc, NULL, 1660b8e80941Smrg voffset, soffset, 1661b8e80941Smrg 1, ctx->i8, glc, false, 1662b8e80941Smrg false, false, false); 1663b8e80941Smrg } else { 1664b8e80941Smrg unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; 1665b8e80941Smrg unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; 1666b8e80941Smrg 1667b8e80941Smrg res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset, 1668b8e80941Smrg immoffset, 1, dfmt, nfmt, glc, false, 1669b8e80941Smrg false); 1670b8e80941Smrg 1671b8e80941Smrg res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, ""); 1672b8e80941Smrg } 1673b8e80941Smrg 1674b8e80941Smrg return res; 1675b8e80941Smrg} 1676b8e80941Smrgstatic void 1677b8e80941Smrgac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx, 1678b8e80941Smrg LLVMValueRef rsrc, 1679b8e80941Smrg LLVMValueRef vdata, 1680b8e80941Smrg LLVMValueRef vindex, 1681b8e80941Smrg LLVMValueRef voffset, 1682b8e80941Smrg LLVMValueRef soffset, 1683b8e80941Smrg unsigned num_channels, 1684b8e80941Smrg unsigned dfmt, 1685b8e80941Smrg unsigned nfmt, 1686b8e80941Smrg bool glc, 1687b8e80941Smrg bool slc, 1688b8e80941Smrg bool writeonly_memory, 1689b8e80941Smrg bool structurized) 1690b8e80941Smrg{ 1691b8e80941Smrg LLVMValueRef args[7]; 1692b8e80941Smrg int idx = 0; 1693b8e80941Smrg args[idx++] = vdata; 1694b8e80941Smrg args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); 1695b8e80941Smrg if (structurized) 1696b8e80941Smrg args[idx++] = vindex ? vindex : ctx->i32_0; 1697b8e80941Smrg args[idx++] = voffset ? voffset : ctx->i32_0; 1698b8e80941Smrg args[idx++] = soffset ? soffset : ctx->i32_0; 1699b8e80941Smrg args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); 1700b8e80941Smrg args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); 1701b8e80941Smrg unsigned func = num_channels == 3 ? 4 : num_channels; 1702b8e80941Smrg const char *indexing_kind = structurized ? "struct" : "raw"; 1703b8e80941Smrg char name[256], type_name[8]; 1704b8e80941Smrg 1705b8e80941Smrg LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32; 1706b8e80941Smrg ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); 1707b8e80941Smrg 1708b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", 1709b8e80941Smrg indexing_kind, type_name); 1710b8e80941Smrg 1711b8e80941Smrg ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, 1712b8e80941Smrg ac_get_store_intr_attribs(writeonly_memory)); 1713b8e80941Smrg} 1714b8e80941Smrg 1715b8e80941Smrgstatic void 1716b8e80941Smrgac_build_tbuffer_store(struct ac_llvm_context *ctx, 1717b8e80941Smrg LLVMValueRef rsrc, 1718b8e80941Smrg LLVMValueRef vdata, 1719b8e80941Smrg LLVMValueRef vindex, 1720b8e80941Smrg LLVMValueRef voffset, 1721b8e80941Smrg LLVMValueRef soffset, 1722b8e80941Smrg LLVMValueRef immoffset, 1723b8e80941Smrg unsigned num_channels, 1724b8e80941Smrg unsigned dfmt, 1725b8e80941Smrg unsigned nfmt, 1726b8e80941Smrg bool glc, 1727b8e80941Smrg bool slc, 1728b8e80941Smrg bool writeonly_memory, 1729b8e80941Smrg bool structurized) /* only matters for LLVM 8+ */ 1730b8e80941Smrg{ 1731b8e80941Smrg if (HAVE_LLVM >= 0x800) { 1732b8e80941Smrg voffset = LLVMBuildAdd(ctx->builder, 1733b8e80941Smrg voffset ? voffset : ctx->i32_0, 1734b8e80941Smrg immoffset, ""); 1735b8e80941Smrg 1736b8e80941Smrg ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, 1737b8e80941Smrg soffset, num_channels, dfmt, nfmt, 1738b8e80941Smrg glc, slc, writeonly_memory, 1739b8e80941Smrg structurized); 1740b8e80941Smrg } else { 1741b8e80941Smrg LLVMValueRef params[] = { 1742b8e80941Smrg vdata, 1743b8e80941Smrg rsrc, 1744b8e80941Smrg vindex ? vindex : ctx->i32_0, 1745b8e80941Smrg voffset ? voffset : ctx->i32_0, 1746b8e80941Smrg soffset ? soffset : ctx->i32_0, 1747b8e80941Smrg immoffset, 1748b8e80941Smrg LLVMConstInt(ctx->i32, dfmt, false), 1749b8e80941Smrg LLVMConstInt(ctx->i32, nfmt, false), 1750b8e80941Smrg LLVMConstInt(ctx->i1, glc, false), 1751b8e80941Smrg LLVMConstInt(ctx->i1, slc, false), 1752b8e80941Smrg }; 1753b8e80941Smrg unsigned func = CLAMP(num_channels, 1, 3) - 1; 1754b8e80941Smrg const char *type_names[] = {"i32", "v2i32", "v4i32"}; 1755b8e80941Smrg char name[256]; 1756b8e80941Smrg 1757b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s", 1758b8e80941Smrg type_names[func]); 1759b8e80941Smrg 1760b8e80941Smrg ac_build_intrinsic(ctx, name, ctx->voidt, params, 10, 1761b8e80941Smrg ac_get_store_intr_attribs(writeonly_memory)); 1762b8e80941Smrg } 1763b8e80941Smrg} 1764b8e80941Smrg 1765b8e80941Smrgvoid 1766b8e80941Smrgac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, 1767b8e80941Smrg LLVMValueRef rsrc, 1768b8e80941Smrg LLVMValueRef vdata, 1769b8e80941Smrg LLVMValueRef vindex, 1770b8e80941Smrg LLVMValueRef voffset, 1771b8e80941Smrg LLVMValueRef soffset, 1772b8e80941Smrg LLVMValueRef immoffset, 1773b8e80941Smrg unsigned num_channels, 1774b8e80941Smrg unsigned dfmt, 1775b8e80941Smrg unsigned nfmt, 1776b8e80941Smrg bool glc, 1777b8e80941Smrg bool slc, 1778b8e80941Smrg bool writeonly_memory) 1779b8e80941Smrg{ 1780b8e80941Smrg ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, 1781b8e80941Smrg immoffset, num_channels, dfmt, nfmt, glc, slc, 1782b8e80941Smrg writeonly_memory, true); 1783b8e80941Smrg} 1784b8e80941Smrg 1785b8e80941Smrgvoid 1786b8e80941Smrgac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, 1787b8e80941Smrg LLVMValueRef rsrc, 1788b8e80941Smrg LLVMValueRef vdata, 1789b8e80941Smrg LLVMValueRef voffset, 1790b8e80941Smrg LLVMValueRef soffset, 1791b8e80941Smrg LLVMValueRef immoffset, 1792b8e80941Smrg unsigned num_channels, 1793b8e80941Smrg unsigned dfmt, 1794b8e80941Smrg unsigned nfmt, 1795b8e80941Smrg bool glc, 1796b8e80941Smrg bool slc, 1797b8e80941Smrg bool writeonly_memory) 1798b8e80941Smrg{ 1799b8e80941Smrg ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset, 1800b8e80941Smrg immoffset, num_channels, dfmt, nfmt, glc, slc, 1801b8e80941Smrg writeonly_memory, false); 1802b8e80941Smrg} 1803b8e80941Smrg 1804b8e80941Smrgvoid 1805b8e80941Smrgac_build_tbuffer_store_short(struct ac_llvm_context *ctx, 1806b8e80941Smrg LLVMValueRef rsrc, 1807b8e80941Smrg LLVMValueRef vdata, 1808b8e80941Smrg LLVMValueRef voffset, 1809b8e80941Smrg LLVMValueRef soffset, 1810b8e80941Smrg bool glc, 1811b8e80941Smrg bool writeonly_memory) 1812b8e80941Smrg{ 1813b8e80941Smrg vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, ""); 1814b8e80941Smrg 1815b8e80941Smrg if (HAVE_LLVM >= 0x900) { 1816b8e80941Smrg /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ 1817b8e80941Smrg ac_build_llvm8_buffer_store_common(ctx, rsrc, vdata, NULL, 1818b8e80941Smrg voffset, soffset, 1, 1819b8e80941Smrg ctx->i16, glc, false, 1820b8e80941Smrg writeonly_memory, false, 1821b8e80941Smrg false); 1822b8e80941Smrg } else { 1823b8e80941Smrg unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; 1824b8e80941Smrg unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; 1825b8e80941Smrg 1826b8e80941Smrg vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, ""); 1827b8e80941Smrg 1828b8e80941Smrg ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, 1829b8e80941Smrg ctx->i32_0, 1, dfmt, nfmt, glc, false, 1830b8e80941Smrg writeonly_memory); 1831b8e80941Smrg } 1832b8e80941Smrg} 1833b8e80941Smrg 1834b8e80941Smrgvoid 1835b8e80941Smrgac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, 1836b8e80941Smrg LLVMValueRef rsrc, 1837b8e80941Smrg LLVMValueRef vdata, 1838b8e80941Smrg LLVMValueRef voffset, 1839b8e80941Smrg LLVMValueRef soffset, 1840b8e80941Smrg bool glc, 1841b8e80941Smrg bool writeonly_memory) 1842b8e80941Smrg{ 1843b8e80941Smrg vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, ""); 1844b8e80941Smrg 1845b8e80941Smrg if (HAVE_LLVM >= 0x900) { 1846b8e80941Smrg /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ 1847b8e80941Smrg ac_build_llvm8_buffer_store_common(ctx, rsrc, vdata, NULL, 1848b8e80941Smrg voffset, soffset, 1, 1849b8e80941Smrg ctx->i8, glc, false, 1850b8e80941Smrg writeonly_memory, false, 1851b8e80941Smrg false); 1852b8e80941Smrg } else { 1853b8e80941Smrg unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; 1854b8e80941Smrg unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; 1855b8e80941Smrg 1856b8e80941Smrg vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, ""); 1857b8e80941Smrg 1858b8e80941Smrg ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, 1859b8e80941Smrg ctx->i32_0, 1, dfmt, nfmt, glc, false, 1860b8e80941Smrg writeonly_memory); 1861b8e80941Smrg } 1862b8e80941Smrg} 1863b8e80941Smrg/** 1864b8e80941Smrg * Set range metadata on an instruction. This can only be used on load and 1865b8e80941Smrg * call instructions. If you know an instruction can only produce the values 1866b8e80941Smrg * 0, 1, 2, you would do set_range_metadata(value, 0, 3); 1867b8e80941Smrg * \p lo is the minimum value inclusive. 1868b8e80941Smrg * \p hi is the maximum value exclusive. 1869b8e80941Smrg */ 1870b8e80941Smrgstatic void set_range_metadata(struct ac_llvm_context *ctx, 1871b8e80941Smrg LLVMValueRef value, unsigned lo, unsigned hi) 1872b8e80941Smrg{ 1873b8e80941Smrg LLVMValueRef range_md, md_args[2]; 1874b8e80941Smrg LLVMTypeRef type = LLVMTypeOf(value); 1875b8e80941Smrg LLVMContextRef context = LLVMGetTypeContext(type); 1876b8e80941Smrg 1877b8e80941Smrg md_args[0] = LLVMConstInt(type, lo, false); 1878b8e80941Smrg md_args[1] = LLVMConstInt(type, hi, false); 1879b8e80941Smrg range_md = LLVMMDNodeInContext(context, md_args, 2); 1880b8e80941Smrg LLVMSetMetadata(value, ctx->range_md_kind, range_md); 1881b8e80941Smrg} 1882b8e80941Smrg 1883b8e80941SmrgLLVMValueRef 1884b8e80941Smrgac_get_thread_id(struct ac_llvm_context *ctx) 1885b8e80941Smrg{ 1886b8e80941Smrg LLVMValueRef tid; 1887b8e80941Smrg 1888b8e80941Smrg LLVMValueRef tid_args[2]; 1889b8e80941Smrg tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false); 1890b8e80941Smrg tid_args[1] = ctx->i32_0; 1891b8e80941Smrg tid_args[1] = ac_build_intrinsic(ctx, 1892b8e80941Smrg "llvm.amdgcn.mbcnt.lo", ctx->i32, 1893b8e80941Smrg tid_args, 2, AC_FUNC_ATTR_READNONE); 1894b8e80941Smrg 1895b8e80941Smrg tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", 1896b8e80941Smrg ctx->i32, tid_args, 1897b8e80941Smrg 2, AC_FUNC_ATTR_READNONE); 1898b8e80941Smrg set_range_metadata(ctx, tid, 0, 64); 1899b8e80941Smrg return tid; 1900b8e80941Smrg} 1901b8e80941Smrg 1902b8e80941Smrg/* 1903b8e80941Smrg * SI implements derivatives using the local data store (LDS) 1904b8e80941Smrg * All writes to the LDS happen in all executing threads at 1905b8e80941Smrg * the same time. TID is the Thread ID for the current 1906b8e80941Smrg * thread and is a value between 0 and 63, representing 1907b8e80941Smrg * the thread's position in the wavefront. 1908b8e80941Smrg * 1909b8e80941Smrg * For the pixel shader threads are grouped into quads of four pixels. 1910b8e80941Smrg * The TIDs of the pixels of a quad are: 1911b8e80941Smrg * 1912b8e80941Smrg * +------+------+ 1913b8e80941Smrg * |4n + 0|4n + 1| 1914b8e80941Smrg * +------+------+ 1915b8e80941Smrg * |4n + 2|4n + 3| 1916b8e80941Smrg * +------+------+ 1917b8e80941Smrg * 1918b8e80941Smrg * So, masking the TID with 0xfffffffc yields the TID of the top left pixel 1919b8e80941Smrg * of the quad, masking with 0xfffffffd yields the TID of the top pixel of 1920b8e80941Smrg * the current pixel's column, and masking with 0xfffffffe yields the TID 1921b8e80941Smrg * of the left pixel of the current pixel's row. 1922b8e80941Smrg * 1923b8e80941Smrg * Adding 1 yields the TID of the pixel to the right of the left pixel, and 1924b8e80941Smrg * adding 2 yields the TID of the pixel below the top pixel. 1925b8e80941Smrg */ 1926b8e80941SmrgLLVMValueRef 1927b8e80941Smrgac_build_ddxy(struct ac_llvm_context *ctx, 1928b8e80941Smrg uint32_t mask, 1929b8e80941Smrg int idx, 1930b8e80941Smrg LLVMValueRef val) 1931b8e80941Smrg{ 1932b8e80941Smrg unsigned tl_lanes[4], trbl_lanes[4]; 1933b8e80941Smrg char name[32], type[8]; 1934b8e80941Smrg LLVMValueRef tl, trbl; 1935b8e80941Smrg LLVMTypeRef result_type; 1936b8e80941Smrg LLVMValueRef result; 1937b8e80941Smrg 1938b8e80941Smrg result_type = ac_to_float_type(ctx, LLVMTypeOf(val)); 1939b8e80941Smrg 1940b8e80941Smrg if (result_type == ctx->f16) 1941b8e80941Smrg val = LLVMBuildZExt(ctx->builder, val, ctx->i32, ""); 1942b8e80941Smrg 1943b8e80941Smrg for (unsigned i = 0; i < 4; ++i) { 1944b8e80941Smrg tl_lanes[i] = i & mask; 1945b8e80941Smrg trbl_lanes[i] = (i & mask) + idx; 1946b8e80941Smrg } 1947b8e80941Smrg 1948b8e80941Smrg tl = ac_build_quad_swizzle(ctx, val, 1949b8e80941Smrg tl_lanes[0], tl_lanes[1], 1950b8e80941Smrg tl_lanes[2], tl_lanes[3]); 1951b8e80941Smrg trbl = ac_build_quad_swizzle(ctx, val, 1952b8e80941Smrg trbl_lanes[0], trbl_lanes[1], 1953b8e80941Smrg trbl_lanes[2], trbl_lanes[3]); 1954b8e80941Smrg 1955b8e80941Smrg if (result_type == ctx->f16) { 1956b8e80941Smrg tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, ""); 1957b8e80941Smrg trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, ""); 1958b8e80941Smrg } 1959b8e80941Smrg 1960b8e80941Smrg tl = LLVMBuildBitCast(ctx->builder, tl, result_type, ""); 1961b8e80941Smrg trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, ""); 1962b8e80941Smrg result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); 1963b8e80941Smrg 1964b8e80941Smrg ac_build_type_name_for_intr(result_type, type, sizeof(type)); 1965b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type); 1966b8e80941Smrg 1967b8e80941Smrg return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0); 1968b8e80941Smrg} 1969b8e80941Smrg 1970b8e80941Smrgvoid 1971b8e80941Smrgac_build_sendmsg(struct ac_llvm_context *ctx, 1972b8e80941Smrg uint32_t msg, 1973b8e80941Smrg LLVMValueRef wave_id) 1974b8e80941Smrg{ 1975b8e80941Smrg LLVMValueRef args[2]; 1976b8e80941Smrg args[0] = LLVMConstInt(ctx->i32, msg, false); 1977b8e80941Smrg args[1] = wave_id; 1978b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0); 1979b8e80941Smrg} 1980b8e80941Smrg 1981b8e80941SmrgLLVMValueRef 1982b8e80941Smrgac_build_imsb(struct ac_llvm_context *ctx, 1983b8e80941Smrg LLVMValueRef arg, 1984b8e80941Smrg LLVMTypeRef dst_type) 1985b8e80941Smrg{ 1986b8e80941Smrg LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32", 1987b8e80941Smrg dst_type, &arg, 1, 1988b8e80941Smrg AC_FUNC_ATTR_READNONE); 1989b8e80941Smrg 1990b8e80941Smrg /* The HW returns the last bit index from MSB, but NIR/TGSI wants 1991b8e80941Smrg * the index from LSB. Invert it by doing "31 - msb". */ 1992b8e80941Smrg msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), 1993b8e80941Smrg msb, ""); 1994b8e80941Smrg 1995b8e80941Smrg LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); 1996b8e80941Smrg LLVMValueRef cond = LLVMBuildOr(ctx->builder, 1997b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, 1998b8e80941Smrg arg, ctx->i32_0, ""), 1999b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, 2000b8e80941Smrg arg, all_ones, ""), ""); 2001b8e80941Smrg 2002b8e80941Smrg return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); 2003b8e80941Smrg} 2004b8e80941Smrg 2005b8e80941SmrgLLVMValueRef 2006b8e80941Smrgac_build_umsb(struct ac_llvm_context *ctx, 2007b8e80941Smrg LLVMValueRef arg, 2008b8e80941Smrg LLVMTypeRef dst_type) 2009b8e80941Smrg{ 2010b8e80941Smrg const char *intrin_name; 2011b8e80941Smrg LLVMTypeRef type; 2012b8e80941Smrg LLVMValueRef highest_bit; 2013b8e80941Smrg LLVMValueRef zero; 2014b8e80941Smrg unsigned bitsize; 2015b8e80941Smrg 2016b8e80941Smrg bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg)); 2017b8e80941Smrg switch (bitsize) { 2018b8e80941Smrg case 64: 2019b8e80941Smrg intrin_name = "llvm.ctlz.i64"; 2020b8e80941Smrg type = ctx->i64; 2021b8e80941Smrg highest_bit = LLVMConstInt(ctx->i64, 63, false); 2022b8e80941Smrg zero = ctx->i64_0; 2023b8e80941Smrg break; 2024b8e80941Smrg case 32: 2025b8e80941Smrg intrin_name = "llvm.ctlz.i32"; 2026b8e80941Smrg type = ctx->i32; 2027b8e80941Smrg highest_bit = LLVMConstInt(ctx->i32, 31, false); 2028b8e80941Smrg zero = ctx->i32_0; 2029b8e80941Smrg break; 2030b8e80941Smrg case 16: 2031b8e80941Smrg intrin_name = "llvm.ctlz.i16"; 2032b8e80941Smrg type = ctx->i16; 2033b8e80941Smrg highest_bit = LLVMConstInt(ctx->i16, 15, false); 2034b8e80941Smrg zero = ctx->i16_0; 2035b8e80941Smrg break; 2036b8e80941Smrg case 8: 2037b8e80941Smrg intrin_name = "llvm.ctlz.i8"; 2038b8e80941Smrg type = ctx->i8; 2039b8e80941Smrg highest_bit = LLVMConstInt(ctx->i8, 7, false); 2040b8e80941Smrg zero = ctx->i8_0; 2041b8e80941Smrg break; 2042b8e80941Smrg default: 2043b8e80941Smrg unreachable(!"invalid bitsize"); 2044b8e80941Smrg break; 2045b8e80941Smrg } 2046b8e80941Smrg 2047b8e80941Smrg LLVMValueRef params[2] = { 2048b8e80941Smrg arg, 2049b8e80941Smrg ctx->i1true, 2050b8e80941Smrg }; 2051b8e80941Smrg 2052b8e80941Smrg LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type, 2053b8e80941Smrg params, 2, 2054b8e80941Smrg AC_FUNC_ATTR_READNONE); 2055b8e80941Smrg 2056b8e80941Smrg /* The HW returns the last bit index from MSB, but TGSI/NIR wants 2057b8e80941Smrg * the index from LSB. Invert it by doing "31 - msb". */ 2058b8e80941Smrg msb = LLVMBuildSub(ctx->builder, highest_bit, msb, ""); 2059b8e80941Smrg 2060b8e80941Smrg if (bitsize == 64) { 2061b8e80941Smrg msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, ""); 2062b8e80941Smrg } else if (bitsize < 32) { 2063b8e80941Smrg msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, ""); 2064b8e80941Smrg } 2065b8e80941Smrg 2066b8e80941Smrg /* check for zero */ 2067b8e80941Smrg return LLVMBuildSelect(ctx->builder, 2068b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""), 2069b8e80941Smrg LLVMConstInt(ctx->i32, -1, true), msb, ""); 2070b8e80941Smrg} 2071b8e80941Smrg 2072b8e80941SmrgLLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, 2073b8e80941Smrg LLVMValueRef b) 2074b8e80941Smrg{ 2075b8e80941Smrg char name[64]; 2076b8e80941Smrg snprintf(name, sizeof(name), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a))); 2077b8e80941Smrg LLVMValueRef args[2] = {a, b}; 2078b8e80941Smrg return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, 2079b8e80941Smrg AC_FUNC_ATTR_READNONE); 2080b8e80941Smrg} 2081b8e80941Smrg 2082b8e80941SmrgLLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, 2083b8e80941Smrg LLVMValueRef b) 2084b8e80941Smrg{ 2085b8e80941Smrg char name[64]; 2086b8e80941Smrg snprintf(name, sizeof(name), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a))); 2087b8e80941Smrg LLVMValueRef args[2] = {a, b}; 2088b8e80941Smrg return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, 2089b8e80941Smrg AC_FUNC_ATTR_READNONE); 2090b8e80941Smrg} 2091b8e80941Smrg 2092b8e80941SmrgLLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, 2093b8e80941Smrg LLVMValueRef b) 2094b8e80941Smrg{ 2095b8e80941Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, ""); 2096b8e80941Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 2097b8e80941Smrg} 2098b8e80941Smrg 2099b8e80941SmrgLLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, 2100b8e80941Smrg LLVMValueRef b) 2101b8e80941Smrg{ 2102b8e80941Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, ""); 2103b8e80941Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 2104b8e80941Smrg} 2105b8e80941Smrg 2106b8e80941SmrgLLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, 2107b8e80941Smrg LLVMValueRef b) 2108b8e80941Smrg{ 2109b8e80941Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, ""); 2110b8e80941Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 2111b8e80941Smrg} 2112b8e80941Smrg 2113b8e80941SmrgLLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, 2114b8e80941Smrg LLVMValueRef b) 2115b8e80941Smrg{ 2116b8e80941Smrg LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, ""); 2117b8e80941Smrg return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 2118b8e80941Smrg} 2119b8e80941Smrg 2120b8e80941SmrgLLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value) 2121b8e80941Smrg{ 2122b8e80941Smrg LLVMTypeRef t = LLVMTypeOf(value); 2123b8e80941Smrg return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)), 2124b8e80941Smrg LLVMConstReal(t, 1.0)); 2125b8e80941Smrg} 2126b8e80941Smrg 2127b8e80941Smrgvoid ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a) 2128b8e80941Smrg{ 2129b8e80941Smrg LLVMValueRef args[9]; 2130b8e80941Smrg 2131b8e80941Smrg args[0] = LLVMConstInt(ctx->i32, a->target, 0); 2132b8e80941Smrg args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0); 2133b8e80941Smrg 2134b8e80941Smrg if (a->compr) { 2135b8e80941Smrg LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context); 2136b8e80941Smrg LLVMTypeRef v2i16 = LLVMVectorType(i16, 2); 2137b8e80941Smrg 2138b8e80941Smrg args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], 2139b8e80941Smrg v2i16, ""); 2140b8e80941Smrg args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], 2141b8e80941Smrg v2i16, ""); 2142b8e80941Smrg args[4] = LLVMConstInt(ctx->i1, a->done, 0); 2143b8e80941Smrg args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0); 2144b8e80941Smrg 2145b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", 2146b8e80941Smrg ctx->voidt, args, 6, 0); 2147b8e80941Smrg } else { 2148b8e80941Smrg args[2] = a->out[0]; 2149b8e80941Smrg args[3] = a->out[1]; 2150b8e80941Smrg args[4] = a->out[2]; 2151b8e80941Smrg args[5] = a->out[3]; 2152b8e80941Smrg args[6] = LLVMConstInt(ctx->i1, a->done, 0); 2153b8e80941Smrg args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0); 2154b8e80941Smrg 2155b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", 2156b8e80941Smrg ctx->voidt, args, 8, 0); 2157b8e80941Smrg } 2158b8e80941Smrg} 2159b8e80941Smrg 2160b8e80941Smrgvoid ac_build_export_null(struct ac_llvm_context *ctx) 2161b8e80941Smrg{ 2162b8e80941Smrg struct ac_export_args args; 2163b8e80941Smrg 2164b8e80941Smrg args.enabled_channels = 0x0; /* enabled channels */ 2165b8e80941Smrg args.valid_mask = 1; /* whether the EXEC mask is valid */ 2166b8e80941Smrg args.done = 1; /* DONE bit */ 2167b8e80941Smrg args.target = V_008DFC_SQ_EXP_NULL; 2168b8e80941Smrg args.compr = 0; /* COMPR flag (0 = 32-bit export) */ 2169b8e80941Smrg args.out[0] = LLVMGetUndef(ctx->f32); /* R */ 2170b8e80941Smrg args.out[1] = LLVMGetUndef(ctx->f32); /* G */ 2171b8e80941Smrg args.out[2] = LLVMGetUndef(ctx->f32); /* B */ 2172b8e80941Smrg args.out[3] = LLVMGetUndef(ctx->f32); /* A */ 2173b8e80941Smrg 2174b8e80941Smrg ac_build_export(ctx, &args); 2175b8e80941Smrg} 2176b8e80941Smrg 2177b8e80941Smrgstatic unsigned ac_num_coords(enum ac_image_dim dim) 2178b8e80941Smrg{ 2179b8e80941Smrg switch (dim) { 2180b8e80941Smrg case ac_image_1d: 2181b8e80941Smrg return 1; 2182b8e80941Smrg case ac_image_2d: 2183b8e80941Smrg case ac_image_1darray: 2184b8e80941Smrg return 2; 2185b8e80941Smrg case ac_image_3d: 2186b8e80941Smrg case ac_image_cube: 2187b8e80941Smrg case ac_image_2darray: 2188b8e80941Smrg case ac_image_2dmsaa: 2189b8e80941Smrg return 3; 2190b8e80941Smrg case ac_image_2darraymsaa: 2191b8e80941Smrg return 4; 2192b8e80941Smrg default: 2193b8e80941Smrg unreachable("ac_num_coords: bad dim"); 2194b8e80941Smrg } 2195b8e80941Smrg} 2196b8e80941Smrg 2197b8e80941Smrgstatic unsigned ac_num_derivs(enum ac_image_dim dim) 2198b8e80941Smrg{ 2199b8e80941Smrg switch (dim) { 2200b8e80941Smrg case ac_image_1d: 2201b8e80941Smrg case ac_image_1darray: 2202b8e80941Smrg return 2; 2203b8e80941Smrg case ac_image_2d: 2204b8e80941Smrg case ac_image_2darray: 2205b8e80941Smrg case ac_image_cube: 2206b8e80941Smrg return 4; 2207b8e80941Smrg case ac_image_3d: 2208b8e80941Smrg return 6; 2209b8e80941Smrg case ac_image_2dmsaa: 2210b8e80941Smrg case ac_image_2darraymsaa: 2211b8e80941Smrg default: 2212b8e80941Smrg unreachable("derivatives not supported"); 2213b8e80941Smrg } 2214b8e80941Smrg} 2215b8e80941Smrg 2216b8e80941Smrgstatic const char *get_atomic_name(enum ac_atomic_op op) 2217b8e80941Smrg{ 2218b8e80941Smrg switch (op) { 2219b8e80941Smrg case ac_atomic_swap: return "swap"; 2220b8e80941Smrg case ac_atomic_add: return "add"; 2221b8e80941Smrg case ac_atomic_sub: return "sub"; 2222b8e80941Smrg case ac_atomic_smin: return "smin"; 2223b8e80941Smrg case ac_atomic_umin: return "umin"; 2224b8e80941Smrg case ac_atomic_smax: return "smax"; 2225b8e80941Smrg case ac_atomic_umax: return "umax"; 2226b8e80941Smrg case ac_atomic_and: return "and"; 2227b8e80941Smrg case ac_atomic_or: return "or"; 2228b8e80941Smrg case ac_atomic_xor: return "xor"; 2229b8e80941Smrg } 2230b8e80941Smrg unreachable("bad atomic op"); 2231b8e80941Smrg} 2232b8e80941Smrg 2233b8e80941SmrgLLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, 2234b8e80941Smrg struct ac_image_args *a) 2235b8e80941Smrg{ 2236b8e80941Smrg const char *overload[3] = { "", "", "" }; 2237b8e80941Smrg unsigned num_overloads = 0; 2238b8e80941Smrg LLVMValueRef args[18]; 2239b8e80941Smrg unsigned num_args = 0; 2240b8e80941Smrg enum ac_image_dim dim = a->dim; 2241b8e80941Smrg 2242b8e80941Smrg assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || 2243b8e80941Smrg !a->level_zero); 2244b8e80941Smrg assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip && 2245b8e80941Smrg a->opcode != ac_image_store_mip) || 2246b8e80941Smrg a->lod); 2247b8e80941Smrg assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || 2248b8e80941Smrg (!a->compare && !a->offset)); 2249b8e80941Smrg assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || 2250b8e80941Smrg a->opcode == ac_image_get_lod) || 2251b8e80941Smrg !a->bias); 2252b8e80941Smrg assert((a->bias ? 1 : 0) + 2253b8e80941Smrg (a->lod ? 1 : 0) + 2254b8e80941Smrg (a->level_zero ? 1 : 0) + 2255b8e80941Smrg (a->derivs[0] ? 1 : 0) <= 1); 2256b8e80941Smrg 2257b8e80941Smrg if (a->opcode == ac_image_get_lod) { 2258b8e80941Smrg switch (dim) { 2259b8e80941Smrg case ac_image_1darray: 2260b8e80941Smrg dim = ac_image_1d; 2261b8e80941Smrg break; 2262b8e80941Smrg case ac_image_2darray: 2263b8e80941Smrg case ac_image_cube: 2264b8e80941Smrg dim = ac_image_2d; 2265b8e80941Smrg break; 2266b8e80941Smrg default: 2267b8e80941Smrg break; 2268b8e80941Smrg } 2269b8e80941Smrg } 2270b8e80941Smrg 2271b8e80941Smrg bool sample = a->opcode == ac_image_sample || 2272b8e80941Smrg a->opcode == ac_image_gather4 || 2273b8e80941Smrg a->opcode == ac_image_get_lod; 2274b8e80941Smrg bool atomic = a->opcode == ac_image_atomic || 2275b8e80941Smrg a->opcode == ac_image_atomic_cmpswap; 2276b8e80941Smrg LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32; 2277b8e80941Smrg 2278b8e80941Smrg if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { 2279b8e80941Smrg args[num_args++] = a->data[0]; 2280b8e80941Smrg if (a->opcode == ac_image_atomic_cmpswap) 2281b8e80941Smrg args[num_args++] = a->data[1]; 2282b8e80941Smrg } 2283b8e80941Smrg 2284b8e80941Smrg if (!atomic) 2285b8e80941Smrg args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false); 2286b8e80941Smrg 2287b8e80941Smrg if (a->offset) 2288b8e80941Smrg args[num_args++] = ac_to_integer(ctx, a->offset); 2289b8e80941Smrg if (a->bias) { 2290b8e80941Smrg args[num_args++] = ac_to_float(ctx, a->bias); 2291b8e80941Smrg overload[num_overloads++] = ".f32"; 2292b8e80941Smrg } 2293b8e80941Smrg if (a->compare) 2294b8e80941Smrg args[num_args++] = ac_to_float(ctx, a->compare); 2295b8e80941Smrg if (a->derivs[0]) { 2296b8e80941Smrg unsigned count = ac_num_derivs(dim); 2297b8e80941Smrg for (unsigned i = 0; i < count; ++i) 2298b8e80941Smrg args[num_args++] = ac_to_float(ctx, a->derivs[i]); 2299b8e80941Smrg overload[num_overloads++] = ".f32"; 2300b8e80941Smrg } 2301b8e80941Smrg unsigned num_coords = 2302b8e80941Smrg a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0; 2303b8e80941Smrg for (unsigned i = 0; i < num_coords; ++i) 2304b8e80941Smrg args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, ""); 2305b8e80941Smrg if (a->lod) 2306b8e80941Smrg args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, ""); 2307b8e80941Smrg overload[num_overloads++] = sample ? ".f32" : ".i32"; 2308b8e80941Smrg 2309b8e80941Smrg args[num_args++] = a->resource; 2310b8e80941Smrg if (sample) { 2311b8e80941Smrg args[num_args++] = a->sampler; 2312b8e80941Smrg args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false); 2313b8e80941Smrg } 2314b8e80941Smrg 2315b8e80941Smrg args[num_args++] = ctx->i32_0; /* texfailctrl */ 2316b8e80941Smrg args[num_args++] = LLVMConstInt(ctx->i32, a->cache_policy, false); 2317b8e80941Smrg 2318b8e80941Smrg const char *name; 2319b8e80941Smrg const char *atomic_subop = ""; 2320b8e80941Smrg switch (a->opcode) { 2321b8e80941Smrg case ac_image_sample: name = "sample"; break; 2322b8e80941Smrg case ac_image_gather4: name = "gather4"; break; 2323b8e80941Smrg case ac_image_load: name = "load"; break; 2324b8e80941Smrg case ac_image_load_mip: name = "load.mip"; break; 2325b8e80941Smrg case ac_image_store: name = "store"; break; 2326b8e80941Smrg case ac_image_store_mip: name = "store.mip"; break; 2327b8e80941Smrg case ac_image_atomic: 2328b8e80941Smrg name = "atomic."; 2329b8e80941Smrg atomic_subop = get_atomic_name(a->atomic); 2330b8e80941Smrg break; 2331b8e80941Smrg case ac_image_atomic_cmpswap: 2332b8e80941Smrg name = "atomic."; 2333b8e80941Smrg atomic_subop = "cmpswap"; 2334b8e80941Smrg break; 2335b8e80941Smrg case ac_image_get_lod: name = "getlod"; break; 2336b8e80941Smrg case ac_image_get_resinfo: name = "getresinfo"; break; 2337b8e80941Smrg default: unreachable("invalid image opcode"); 2338b8e80941Smrg } 2339b8e80941Smrg 2340b8e80941Smrg const char *dimname; 2341b8e80941Smrg switch (dim) { 2342b8e80941Smrg case ac_image_1d: dimname = "1d"; break; 2343b8e80941Smrg case ac_image_2d: dimname = "2d"; break; 2344b8e80941Smrg case ac_image_3d: dimname = "3d"; break; 2345b8e80941Smrg case ac_image_cube: dimname = "cube"; break; 2346b8e80941Smrg case ac_image_1darray: dimname = "1darray"; break; 2347b8e80941Smrg case ac_image_2darray: dimname = "2darray"; break; 2348b8e80941Smrg case ac_image_2dmsaa: dimname = "2dmsaa"; break; 2349b8e80941Smrg case ac_image_2darraymsaa: dimname = "2darraymsaa"; break; 2350b8e80941Smrg default: unreachable("invalid dim"); 2351b8e80941Smrg } 2352b8e80941Smrg 2353b8e80941Smrg bool lod_suffix = 2354b8e80941Smrg a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); 2355b8e80941Smrg char intr_name[96]; 2356b8e80941Smrg snprintf(intr_name, sizeof(intr_name), 2357b8e80941Smrg "llvm.amdgcn.image.%s%s" /* base name */ 2358b8e80941Smrg "%s%s%s" /* sample/gather modifiers */ 2359b8e80941Smrg ".%s.%s%s%s%s", /* dimension and type overloads */ 2360b8e80941Smrg name, atomic_subop, 2361b8e80941Smrg a->compare ? ".c" : "", 2362b8e80941Smrg a->bias ? ".b" : 2363b8e80941Smrg lod_suffix ? ".l" : 2364b8e80941Smrg a->derivs[0] ? ".d" : 2365b8e80941Smrg a->level_zero ? ".lz" : "", 2366b8e80941Smrg a->offset ? ".o" : "", 2367b8e80941Smrg dimname, 2368b8e80941Smrg atomic ? "i32" : "v4f32", 2369b8e80941Smrg overload[0], overload[1], overload[2]); 2370b8e80941Smrg 2371b8e80941Smrg LLVMTypeRef retty; 2372b8e80941Smrg if (atomic) 2373b8e80941Smrg retty = ctx->i32; 2374b8e80941Smrg else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) 2375b8e80941Smrg retty = ctx->voidt; 2376b8e80941Smrg else 2377b8e80941Smrg retty = ctx->v4f32; 2378b8e80941Smrg 2379b8e80941Smrg LLVMValueRef result = 2380b8e80941Smrg ac_build_intrinsic(ctx, intr_name, retty, args, num_args, 2381b8e80941Smrg a->attributes); 2382b8e80941Smrg if (!sample && retty == ctx->v4f32) { 2383b8e80941Smrg result = LLVMBuildBitCast(ctx->builder, result, 2384b8e80941Smrg ctx->v4i32, ""); 2385b8e80941Smrg } 2386b8e80941Smrg return result; 2387b8e80941Smrg} 2388b8e80941Smrg 2389b8e80941SmrgLLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, 2390b8e80941Smrg LLVMValueRef args[2]) 2391b8e80941Smrg{ 2392b8e80941Smrg LLVMTypeRef v2f16 = 2393b8e80941Smrg LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2); 2394b8e80941Smrg 2395b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16, 2396b8e80941Smrg args, 2, AC_FUNC_ATTR_READNONE); 2397b8e80941Smrg} 2398b8e80941Smrg 2399b8e80941SmrgLLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, 2400b8e80941Smrg LLVMValueRef args[2]) 2401b8e80941Smrg{ 2402b8e80941Smrg LLVMValueRef res = 2403b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", 2404b8e80941Smrg ctx->v2i16, args, 2, 2405b8e80941Smrg AC_FUNC_ATTR_READNONE); 2406b8e80941Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 2407b8e80941Smrg} 2408b8e80941Smrg 2409b8e80941SmrgLLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, 2410b8e80941Smrg LLVMValueRef args[2]) 2411b8e80941Smrg{ 2412b8e80941Smrg LLVMValueRef res = 2413b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", 2414b8e80941Smrg ctx->v2i16, args, 2, 2415b8e80941Smrg AC_FUNC_ATTR_READNONE); 2416b8e80941Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 2417b8e80941Smrg} 2418b8e80941Smrg 2419b8e80941Smrg/* The 8-bit and 10-bit clamping is for HW workarounds. */ 2420b8e80941SmrgLLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, 2421b8e80941Smrg LLVMValueRef args[2], unsigned bits, bool hi) 2422b8e80941Smrg{ 2423b8e80941Smrg assert(bits == 8 || bits == 10 || bits == 16); 2424b8e80941Smrg 2425b8e80941Smrg LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, 2426b8e80941Smrg bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); 2427b8e80941Smrg LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, 2428b8e80941Smrg bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); 2429b8e80941Smrg LLVMValueRef max_alpha = 2430b8e80941Smrg bits != 10 ? max_rgb : ctx->i32_1; 2431b8e80941Smrg LLVMValueRef min_alpha = 2432b8e80941Smrg bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); 2433b8e80941Smrg 2434b8e80941Smrg /* Clamp. */ 2435b8e80941Smrg if (bits != 16) { 2436b8e80941Smrg for (int i = 0; i < 2; i++) { 2437b8e80941Smrg bool alpha = hi && i == 1; 2438b8e80941Smrg args[i] = ac_build_imin(ctx, args[i], 2439b8e80941Smrg alpha ? max_alpha : max_rgb); 2440b8e80941Smrg args[i] = ac_build_imax(ctx, args[i], 2441b8e80941Smrg alpha ? min_alpha : min_rgb); 2442b8e80941Smrg } 2443b8e80941Smrg } 2444b8e80941Smrg 2445b8e80941Smrg LLVMValueRef res = 2446b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", 2447b8e80941Smrg ctx->v2i16, args, 2, 2448b8e80941Smrg AC_FUNC_ATTR_READNONE); 2449b8e80941Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 2450b8e80941Smrg} 2451b8e80941Smrg 2452b8e80941Smrg/* The 8-bit and 10-bit clamping is for HW workarounds. */ 2453b8e80941SmrgLLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, 2454b8e80941Smrg LLVMValueRef args[2], unsigned bits, bool hi) 2455b8e80941Smrg{ 2456b8e80941Smrg assert(bits == 8 || bits == 10 || bits == 16); 2457b8e80941Smrg 2458b8e80941Smrg LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, 2459b8e80941Smrg bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); 2460b8e80941Smrg LLVMValueRef max_alpha = 2461b8e80941Smrg bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); 2462b8e80941Smrg 2463b8e80941Smrg /* Clamp. */ 2464b8e80941Smrg if (bits != 16) { 2465b8e80941Smrg for (int i = 0; i < 2; i++) { 2466b8e80941Smrg bool alpha = hi && i == 1; 2467b8e80941Smrg args[i] = ac_build_umin(ctx, args[i], 2468b8e80941Smrg alpha ? max_alpha : max_rgb); 2469b8e80941Smrg } 2470b8e80941Smrg } 2471b8e80941Smrg 2472b8e80941Smrg LLVMValueRef res = 2473b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", 2474b8e80941Smrg ctx->v2i16, args, 2, 2475b8e80941Smrg AC_FUNC_ATTR_READNONE); 2476b8e80941Smrg return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 2477b8e80941Smrg} 2478b8e80941Smrg 2479b8e80941SmrgLLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) 2480b8e80941Smrg{ 2481b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, 2482b8e80941Smrg &i1, 1, AC_FUNC_ATTR_READNONE); 2483b8e80941Smrg} 2484b8e80941Smrg 2485b8e80941Smrgvoid ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) 2486b8e80941Smrg{ 2487b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, 2488b8e80941Smrg &i1, 1, 0); 2489b8e80941Smrg} 2490b8e80941Smrg 2491b8e80941SmrgLLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, 2492b8e80941Smrg LLVMValueRef offset, LLVMValueRef width, 2493b8e80941Smrg bool is_signed) 2494b8e80941Smrg{ 2495b8e80941Smrg LLVMValueRef args[] = { 2496b8e80941Smrg input, 2497b8e80941Smrg offset, 2498b8e80941Smrg width, 2499b8e80941Smrg }; 2500b8e80941Smrg 2501b8e80941Smrg return ac_build_intrinsic(ctx, 2502b8e80941Smrg is_signed ? "llvm.amdgcn.sbfe.i32" : 2503b8e80941Smrg "llvm.amdgcn.ubfe.i32", 2504b8e80941Smrg ctx->i32, args, 3, 2505b8e80941Smrg AC_FUNC_ATTR_READNONE); 2506b8e80941Smrg} 2507b8e80941Smrg 2508b8e80941SmrgLLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, 2509b8e80941Smrg LLVMValueRef s1, LLVMValueRef s2) 2510b8e80941Smrg{ 2511b8e80941Smrg return LLVMBuildAdd(ctx->builder, 2512b8e80941Smrg LLVMBuildMul(ctx->builder, s0, s1, ""), s2, ""); 2513b8e80941Smrg} 2514b8e80941Smrg 2515b8e80941SmrgLLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, 2516b8e80941Smrg LLVMValueRef s1, LLVMValueRef s2) 2517b8e80941Smrg{ 2518b8e80941Smrg return LLVMBuildFAdd(ctx->builder, 2519b8e80941Smrg LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, ""); 2520b8e80941Smrg} 2521b8e80941Smrg 2522b8e80941Smrgvoid ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16) 2523b8e80941Smrg{ 2524b8e80941Smrg LLVMValueRef args[1] = { 2525b8e80941Smrg LLVMConstInt(ctx->i32, simm16, false), 2526b8e80941Smrg }; 2527b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", 2528b8e80941Smrg ctx->voidt, args, 1, 0); 2529b8e80941Smrg} 2530b8e80941Smrg 2531b8e80941SmrgLLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0, 2532b8e80941Smrg LLVMValueRef src1, LLVMValueRef src2, 2533b8e80941Smrg unsigned bitsize) 2534b8e80941Smrg{ 2535b8e80941Smrg LLVMTypeRef type; 2536b8e80941Smrg char *intr; 2537b8e80941Smrg 2538b8e80941Smrg if (bitsize == 16) { 2539b8e80941Smrg intr = "llvm.amdgcn.fmed3.f16"; 2540b8e80941Smrg type = ctx->f16; 2541b8e80941Smrg } else if (bitsize == 32) { 2542b8e80941Smrg intr = "llvm.amdgcn.fmed3.f32"; 2543b8e80941Smrg type = ctx->f32; 2544b8e80941Smrg } else { 2545b8e80941Smrg intr = "llvm.amdgcn.fmed3.f64"; 2546b8e80941Smrg type = ctx->f64; 2547b8e80941Smrg } 2548b8e80941Smrg 2549b8e80941Smrg LLVMValueRef params[] = { 2550b8e80941Smrg src0, 2551b8e80941Smrg src1, 2552b8e80941Smrg src2, 2553b8e80941Smrg }; 2554b8e80941Smrg return ac_build_intrinsic(ctx, intr, type, params, 3, 2555b8e80941Smrg AC_FUNC_ATTR_READNONE); 2556b8e80941Smrg} 2557b8e80941Smrg 2558b8e80941SmrgLLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, 2559b8e80941Smrg unsigned bitsize) 2560b8e80941Smrg{ 2561b8e80941Smrg LLVMTypeRef type; 2562b8e80941Smrg char *intr; 2563b8e80941Smrg 2564b8e80941Smrg if (bitsize == 16) { 2565b8e80941Smrg intr = "llvm.amdgcn.fract.f16"; 2566b8e80941Smrg type = ctx->f16; 2567b8e80941Smrg } else if (bitsize == 32) { 2568b8e80941Smrg intr = "llvm.amdgcn.fract.f32"; 2569b8e80941Smrg type = ctx->f32; 2570b8e80941Smrg } else { 2571b8e80941Smrg intr = "llvm.amdgcn.fract.f64"; 2572b8e80941Smrg type = ctx->f64; 2573b8e80941Smrg } 2574b8e80941Smrg 2575b8e80941Smrg LLVMValueRef params[] = { 2576b8e80941Smrg src0, 2577b8e80941Smrg }; 2578b8e80941Smrg return ac_build_intrinsic(ctx, intr, type, params, 1, 2579b8e80941Smrg AC_FUNC_ATTR_READNONE); 2580b8e80941Smrg} 2581b8e80941Smrg 2582b8e80941SmrgLLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0, 2583b8e80941Smrg unsigned bitsize) 2584b8e80941Smrg{ 2585b8e80941Smrg LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, bitsize); 2586b8e80941Smrg LLVMValueRef zero = LLVMConstInt(type, 0, false); 2587b8e80941Smrg LLVMValueRef one = LLVMConstInt(type, 1, false); 2588b8e80941Smrg 2589b8e80941Smrg LLVMValueRef cmp, val; 2590b8e80941Smrg cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, ""); 2591b8e80941Smrg val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); 2592b8e80941Smrg cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, ""); 2593b8e80941Smrg val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), ""); 2594b8e80941Smrg return val; 2595b8e80941Smrg} 2596b8e80941Smrg 2597b8e80941SmrgLLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0, 2598b8e80941Smrg unsigned bitsize) 2599b8e80941Smrg{ 2600b8e80941Smrg LLVMValueRef cmp, val, zero, one; 2601b8e80941Smrg LLVMTypeRef type; 2602b8e80941Smrg 2603b8e80941Smrg if (bitsize == 16) { 2604b8e80941Smrg type = ctx->f16; 2605b8e80941Smrg zero = ctx->f16_0; 2606b8e80941Smrg one = ctx->f16_1; 2607b8e80941Smrg } else if (bitsize == 32) { 2608b8e80941Smrg type = ctx->f32; 2609b8e80941Smrg zero = ctx->f32_0; 2610b8e80941Smrg one = ctx->f32_1; 2611b8e80941Smrg } else { 2612b8e80941Smrg type = ctx->f64; 2613b8e80941Smrg zero = ctx->f64_0; 2614b8e80941Smrg one = ctx->f64_1; 2615b8e80941Smrg } 2616b8e80941Smrg 2617b8e80941Smrg cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, ""); 2618b8e80941Smrg val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); 2619b8e80941Smrg cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, ""); 2620b8e80941Smrg val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), ""); 2621b8e80941Smrg return val; 2622b8e80941Smrg} 2623b8e80941Smrg 2624b8e80941SmrgLLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0) 2625b8e80941Smrg{ 2626b8e80941Smrg LLVMValueRef result; 2627b8e80941Smrg unsigned bitsize; 2628b8e80941Smrg 2629b8e80941Smrg bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); 2630b8e80941Smrg 2631b8e80941Smrg switch (bitsize) { 2632b8e80941Smrg case 64: 2633b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, 2634b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2635b8e80941Smrg AC_FUNC_ATTR_READNONE); 2636b8e80941Smrg 2637b8e80941Smrg result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); 2638b8e80941Smrg break; 2639b8e80941Smrg case 32: 2640b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, 2641b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2642b8e80941Smrg AC_FUNC_ATTR_READNONE); 2643b8e80941Smrg break; 2644b8e80941Smrg case 16: 2645b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, 2646b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2647b8e80941Smrg AC_FUNC_ATTR_READNONE); 2648b8e80941Smrg 2649b8e80941Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 2650b8e80941Smrg break; 2651b8e80941Smrg case 8: 2652b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, 2653b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2654b8e80941Smrg AC_FUNC_ATTR_READNONE); 2655b8e80941Smrg 2656b8e80941Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 2657b8e80941Smrg break; 2658b8e80941Smrg default: 2659b8e80941Smrg unreachable(!"invalid bitsize"); 2660b8e80941Smrg break; 2661b8e80941Smrg } 2662b8e80941Smrg 2663b8e80941Smrg return result; 2664b8e80941Smrg} 2665b8e80941Smrg 2666b8e80941SmrgLLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, 2667b8e80941Smrg LLVMValueRef src0) 2668b8e80941Smrg{ 2669b8e80941Smrg LLVMValueRef result; 2670b8e80941Smrg unsigned bitsize; 2671b8e80941Smrg 2672b8e80941Smrg bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); 2673b8e80941Smrg 2674b8e80941Smrg switch (bitsize) { 2675b8e80941Smrg case 64: 2676b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, 2677b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2678b8e80941Smrg AC_FUNC_ATTR_READNONE); 2679b8e80941Smrg 2680b8e80941Smrg result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); 2681b8e80941Smrg break; 2682b8e80941Smrg case 32: 2683b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, 2684b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2685b8e80941Smrg AC_FUNC_ATTR_READNONE); 2686b8e80941Smrg break; 2687b8e80941Smrg case 16: 2688b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, 2689b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2690b8e80941Smrg AC_FUNC_ATTR_READNONE); 2691b8e80941Smrg 2692b8e80941Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 2693b8e80941Smrg break; 2694b8e80941Smrg case 8: 2695b8e80941Smrg result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, 2696b8e80941Smrg (LLVMValueRef []) { src0 }, 1, 2697b8e80941Smrg AC_FUNC_ATTR_READNONE); 2698b8e80941Smrg 2699b8e80941Smrg result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); 2700b8e80941Smrg break; 2701b8e80941Smrg default: 2702b8e80941Smrg unreachable(!"invalid bitsize"); 2703b8e80941Smrg break; 2704b8e80941Smrg } 2705b8e80941Smrg 2706b8e80941Smrg return result; 2707b8e80941Smrg} 2708b8e80941Smrg 2709b8e80941Smrg#define AC_EXP_TARGET 0 2710b8e80941Smrg#define AC_EXP_ENABLED_CHANNELS 1 2711b8e80941Smrg#define AC_EXP_OUT0 2 2712b8e80941Smrg 2713b8e80941Smrgenum ac_ir_type { 2714b8e80941Smrg AC_IR_UNDEF, 2715b8e80941Smrg AC_IR_CONST, 2716b8e80941Smrg AC_IR_VALUE, 2717b8e80941Smrg}; 2718b8e80941Smrg 2719b8e80941Smrgstruct ac_vs_exp_chan 2720b8e80941Smrg{ 2721b8e80941Smrg LLVMValueRef value; 2722b8e80941Smrg float const_float; 2723b8e80941Smrg enum ac_ir_type type; 2724b8e80941Smrg}; 2725b8e80941Smrg 2726b8e80941Smrgstruct ac_vs_exp_inst { 2727b8e80941Smrg unsigned offset; 2728b8e80941Smrg LLVMValueRef inst; 2729b8e80941Smrg struct ac_vs_exp_chan chan[4]; 2730b8e80941Smrg}; 2731b8e80941Smrg 2732b8e80941Smrgstruct ac_vs_exports { 2733b8e80941Smrg unsigned num; 2734b8e80941Smrg struct ac_vs_exp_inst exp[VARYING_SLOT_MAX]; 2735b8e80941Smrg}; 2736b8e80941Smrg 2737b8e80941Smrg/* Return true if the PARAM export has been eliminated. */ 2738b8e80941Smrgstatic bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, 2739b8e80941Smrg uint32_t num_outputs, 2740b8e80941Smrg struct ac_vs_exp_inst *exp) 2741b8e80941Smrg{ 2742b8e80941Smrg unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ 2743b8e80941Smrg bool is_zero[4] = {}, is_one[4] = {}; 2744b8e80941Smrg 2745b8e80941Smrg for (i = 0; i < 4; i++) { 2746b8e80941Smrg /* It's a constant expression. Undef outputs are eliminated too. */ 2747b8e80941Smrg if (exp->chan[i].type == AC_IR_UNDEF) { 2748b8e80941Smrg is_zero[i] = true; 2749b8e80941Smrg is_one[i] = true; 2750b8e80941Smrg } else if (exp->chan[i].type == AC_IR_CONST) { 2751b8e80941Smrg if (exp->chan[i].const_float == 0) 2752b8e80941Smrg is_zero[i] = true; 2753b8e80941Smrg else if (exp->chan[i].const_float == 1) 2754b8e80941Smrg is_one[i] = true; 2755b8e80941Smrg else 2756b8e80941Smrg return false; /* other constant */ 2757b8e80941Smrg } else 2758b8e80941Smrg return false; 2759b8e80941Smrg } 2760b8e80941Smrg 2761b8e80941Smrg /* Only certain combinations of 0 and 1 can be eliminated. */ 2762b8e80941Smrg if (is_zero[0] && is_zero[1] && is_zero[2]) 2763b8e80941Smrg default_val = is_zero[3] ? 0 : 1; 2764b8e80941Smrg else if (is_one[0] && is_one[1] && is_one[2]) 2765b8e80941Smrg default_val = is_zero[3] ? 2 : 3; 2766b8e80941Smrg else 2767b8e80941Smrg return false; 2768b8e80941Smrg 2769b8e80941Smrg /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ 2770b8e80941Smrg LLVMInstructionEraseFromParent(exp->inst); 2771b8e80941Smrg 2772b8e80941Smrg /* Change OFFSET to DEFAULT_VAL. */ 2773b8e80941Smrg for (i = 0; i < num_outputs; i++) { 2774b8e80941Smrg if (vs_output_param_offset[i] == exp->offset) { 2775b8e80941Smrg vs_output_param_offset[i] = 2776b8e80941Smrg AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val; 2777b8e80941Smrg break; 2778b8e80941Smrg } 2779b8e80941Smrg } 2780b8e80941Smrg return true; 2781b8e80941Smrg} 2782b8e80941Smrg 2783b8e80941Smrgstatic bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx, 2784b8e80941Smrg uint8_t *vs_output_param_offset, 2785b8e80941Smrg uint32_t num_outputs, 2786b8e80941Smrg struct ac_vs_exports *processed, 2787b8e80941Smrg struct ac_vs_exp_inst *exp) 2788b8e80941Smrg{ 2789b8e80941Smrg unsigned p, copy_back_channels = 0; 2790b8e80941Smrg 2791b8e80941Smrg /* See if the output is already in the list of processed outputs. 2792b8e80941Smrg * The LLVMValueRef comparison relies on SSA. 2793b8e80941Smrg */ 2794b8e80941Smrg for (p = 0; p < processed->num; p++) { 2795b8e80941Smrg bool different = false; 2796b8e80941Smrg 2797b8e80941Smrg for (unsigned j = 0; j < 4; j++) { 2798b8e80941Smrg struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j]; 2799b8e80941Smrg struct ac_vs_exp_chan *c2 = &exp->chan[j]; 2800b8e80941Smrg 2801b8e80941Smrg /* Treat undef as a match. */ 2802b8e80941Smrg if (c2->type == AC_IR_UNDEF) 2803b8e80941Smrg continue; 2804b8e80941Smrg 2805b8e80941Smrg /* If c1 is undef but c2 isn't, we can copy c2 to c1 2806b8e80941Smrg * and consider the instruction duplicated. 2807b8e80941Smrg */ 2808b8e80941Smrg if (c1->type == AC_IR_UNDEF) { 2809b8e80941Smrg copy_back_channels |= 1 << j; 2810b8e80941Smrg continue; 2811b8e80941Smrg } 2812b8e80941Smrg 2813b8e80941Smrg /* Test whether the channels are not equal. */ 2814b8e80941Smrg if (c1->type != c2->type || 2815b8e80941Smrg (c1->type == AC_IR_CONST && 2816b8e80941Smrg c1->const_float != c2->const_float) || 2817b8e80941Smrg (c1->type == AC_IR_VALUE && 2818b8e80941Smrg c1->value != c2->value)) { 2819b8e80941Smrg different = true; 2820b8e80941Smrg break; 2821b8e80941Smrg } 2822b8e80941Smrg } 2823b8e80941Smrg if (!different) 2824b8e80941Smrg break; 2825b8e80941Smrg 2826b8e80941Smrg copy_back_channels = 0; 2827b8e80941Smrg } 2828b8e80941Smrg if (p == processed->num) 2829b8e80941Smrg return false; 2830b8e80941Smrg 2831b8e80941Smrg /* If a match was found, but the matching export has undef where the new 2832b8e80941Smrg * one has a normal value, copy the normal value to the undef channel. 2833b8e80941Smrg */ 2834b8e80941Smrg struct ac_vs_exp_inst *match = &processed->exp[p]; 2835b8e80941Smrg 2836b8e80941Smrg /* Get current enabled channels mask. */ 2837b8e80941Smrg LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS); 2838b8e80941Smrg unsigned enabled_channels = LLVMConstIntGetZExtValue(arg); 2839b8e80941Smrg 2840b8e80941Smrg while (copy_back_channels) { 2841b8e80941Smrg unsigned chan = u_bit_scan(©_back_channels); 2842b8e80941Smrg 2843b8e80941Smrg assert(match->chan[chan].type == AC_IR_UNDEF); 2844b8e80941Smrg LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, 2845b8e80941Smrg exp->chan[chan].value); 2846b8e80941Smrg match->chan[chan] = exp->chan[chan]; 2847b8e80941Smrg 2848b8e80941Smrg /* Update number of enabled channels because the original mask 2849b8e80941Smrg * is not always 0xf. 2850b8e80941Smrg */ 2851b8e80941Smrg enabled_channels |= (1 << chan); 2852b8e80941Smrg LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS, 2853b8e80941Smrg LLVMConstInt(ctx->i32, enabled_channels, 0)); 2854b8e80941Smrg } 2855b8e80941Smrg 2856b8e80941Smrg /* The PARAM export is duplicated. Kill it. */ 2857b8e80941Smrg LLVMInstructionEraseFromParent(exp->inst); 2858b8e80941Smrg 2859b8e80941Smrg /* Change OFFSET to the matching export. */ 2860b8e80941Smrg for (unsigned i = 0; i < num_outputs; i++) { 2861b8e80941Smrg if (vs_output_param_offset[i] == exp->offset) { 2862b8e80941Smrg vs_output_param_offset[i] = match->offset; 2863b8e80941Smrg break; 2864b8e80941Smrg } 2865b8e80941Smrg } 2866b8e80941Smrg return true; 2867b8e80941Smrg} 2868b8e80941Smrg 2869b8e80941Smrgvoid ac_optimize_vs_outputs(struct ac_llvm_context *ctx, 2870b8e80941Smrg LLVMValueRef main_fn, 2871b8e80941Smrg uint8_t *vs_output_param_offset, 2872b8e80941Smrg uint32_t num_outputs, 2873b8e80941Smrg uint8_t *num_param_exports) 2874b8e80941Smrg{ 2875b8e80941Smrg LLVMBasicBlockRef bb; 2876b8e80941Smrg bool removed_any = false; 2877b8e80941Smrg struct ac_vs_exports exports; 2878b8e80941Smrg 2879b8e80941Smrg exports.num = 0; 2880b8e80941Smrg 2881b8e80941Smrg /* Process all LLVM instructions. */ 2882b8e80941Smrg bb = LLVMGetFirstBasicBlock(main_fn); 2883b8e80941Smrg while (bb) { 2884b8e80941Smrg LLVMValueRef inst = LLVMGetFirstInstruction(bb); 2885b8e80941Smrg 2886b8e80941Smrg while (inst) { 2887b8e80941Smrg LLVMValueRef cur = inst; 2888b8e80941Smrg inst = LLVMGetNextInstruction(inst); 2889b8e80941Smrg struct ac_vs_exp_inst exp; 2890b8e80941Smrg 2891b8e80941Smrg if (LLVMGetInstructionOpcode(cur) != LLVMCall) 2892b8e80941Smrg continue; 2893b8e80941Smrg 2894b8e80941Smrg LLVMValueRef callee = ac_llvm_get_called_value(cur); 2895b8e80941Smrg 2896b8e80941Smrg if (!ac_llvm_is_function(callee)) 2897b8e80941Smrg continue; 2898b8e80941Smrg 2899b8e80941Smrg const char *name = LLVMGetValueName(callee); 2900b8e80941Smrg unsigned num_args = LLVMCountParams(callee); 2901b8e80941Smrg 2902b8e80941Smrg /* Check if this is an export instruction. */ 2903b8e80941Smrg if ((num_args != 9 && num_args != 8) || 2904b8e80941Smrg (strcmp(name, "llvm.SI.export") && 2905b8e80941Smrg strcmp(name, "llvm.amdgcn.exp.f32"))) 2906b8e80941Smrg continue; 2907b8e80941Smrg 2908b8e80941Smrg LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); 2909b8e80941Smrg unsigned target = LLVMConstIntGetZExtValue(arg); 2910b8e80941Smrg 2911b8e80941Smrg if (target < V_008DFC_SQ_EXP_PARAM) 2912b8e80941Smrg continue; 2913b8e80941Smrg 2914b8e80941Smrg target -= V_008DFC_SQ_EXP_PARAM; 2915b8e80941Smrg 2916b8e80941Smrg /* Parse the instruction. */ 2917b8e80941Smrg memset(&exp, 0, sizeof(exp)); 2918b8e80941Smrg exp.offset = target; 2919b8e80941Smrg exp.inst = cur; 2920b8e80941Smrg 2921b8e80941Smrg for (unsigned i = 0; i < 4; i++) { 2922b8e80941Smrg LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i); 2923b8e80941Smrg 2924b8e80941Smrg exp.chan[i].value = v; 2925b8e80941Smrg 2926b8e80941Smrg if (LLVMIsUndef(v)) { 2927b8e80941Smrg exp.chan[i].type = AC_IR_UNDEF; 2928b8e80941Smrg } else if (LLVMIsAConstantFP(v)) { 2929b8e80941Smrg LLVMBool loses_info; 2930b8e80941Smrg exp.chan[i].type = AC_IR_CONST; 2931b8e80941Smrg exp.chan[i].const_float = 2932b8e80941Smrg LLVMConstRealGetDouble(v, &loses_info); 2933b8e80941Smrg } else { 2934b8e80941Smrg exp.chan[i].type = AC_IR_VALUE; 2935b8e80941Smrg } 2936b8e80941Smrg } 2937b8e80941Smrg 2938b8e80941Smrg /* Eliminate constant and duplicated PARAM exports. */ 2939b8e80941Smrg if (ac_eliminate_const_output(vs_output_param_offset, 2940b8e80941Smrg num_outputs, &exp) || 2941b8e80941Smrg ac_eliminate_duplicated_output(ctx, 2942b8e80941Smrg vs_output_param_offset, 2943b8e80941Smrg num_outputs, &exports, 2944b8e80941Smrg &exp)) { 2945b8e80941Smrg removed_any = true; 2946b8e80941Smrg } else { 2947b8e80941Smrg exports.exp[exports.num++] = exp; 2948b8e80941Smrg } 2949b8e80941Smrg } 2950b8e80941Smrg bb = LLVMGetNextBasicBlock(bb); 2951b8e80941Smrg } 2952b8e80941Smrg 2953b8e80941Smrg /* Remove holes in export memory due to removed PARAM exports. 2954b8e80941Smrg * This is done by renumbering all PARAM exports. 2955b8e80941Smrg */ 2956b8e80941Smrg if (removed_any) { 2957b8e80941Smrg uint8_t old_offset[VARYING_SLOT_MAX]; 2958b8e80941Smrg unsigned out, i; 2959b8e80941Smrg 2960b8e80941Smrg /* Make a copy of the offsets. We need the old version while 2961b8e80941Smrg * we are modifying some of them. */ 2962b8e80941Smrg memcpy(old_offset, vs_output_param_offset, 2963b8e80941Smrg sizeof(old_offset)); 2964b8e80941Smrg 2965b8e80941Smrg for (i = 0; i < exports.num; i++) { 2966b8e80941Smrg unsigned offset = exports.exp[i].offset; 2967b8e80941Smrg 2968b8e80941Smrg /* Update vs_output_param_offset. Multiple outputs can 2969b8e80941Smrg * have the same offset. 2970b8e80941Smrg */ 2971b8e80941Smrg for (out = 0; out < num_outputs; out++) { 2972b8e80941Smrg if (old_offset[out] == offset) 2973b8e80941Smrg vs_output_param_offset[out] = i; 2974b8e80941Smrg } 2975b8e80941Smrg 2976b8e80941Smrg /* Change the PARAM offset in the instruction. */ 2977b8e80941Smrg LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET, 2978b8e80941Smrg LLVMConstInt(ctx->i32, 2979b8e80941Smrg V_008DFC_SQ_EXP_PARAM + i, 0)); 2980b8e80941Smrg } 2981b8e80941Smrg *num_param_exports = exports.num; 2982b8e80941Smrg } 2983b8e80941Smrg} 2984b8e80941Smrg 2985b8e80941Smrgvoid ac_init_exec_full_mask(struct ac_llvm_context *ctx) 2986b8e80941Smrg{ 2987b8e80941Smrg LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); 2988b8e80941Smrg ac_build_intrinsic(ctx, 2989b8e80941Smrg "llvm.amdgcn.init.exec", ctx->voidt, 2990b8e80941Smrg &full_mask, 1, AC_FUNC_ATTR_CONVERGENT); 2991b8e80941Smrg} 2992b8e80941Smrg 2993b8e80941Smrgvoid ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) 2994b8e80941Smrg{ 2995b8e80941Smrg unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768; 2996b8e80941Smrg ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0, 2997b8e80941Smrg LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), 2998b8e80941Smrg "lds"); 2999b8e80941Smrg} 3000b8e80941Smrg 3001b8e80941SmrgLLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, 3002b8e80941Smrg LLVMValueRef dw_addr) 3003b8e80941Smrg{ 3004b8e80941Smrg return ac_build_load(ctx, ctx->lds, dw_addr); 3005b8e80941Smrg} 3006b8e80941Smrg 3007b8e80941Smrgvoid ac_lds_store(struct ac_llvm_context *ctx, 3008b8e80941Smrg LLVMValueRef dw_addr, 3009b8e80941Smrg LLVMValueRef value) 3010b8e80941Smrg{ 3011b8e80941Smrg value = ac_to_integer(ctx, value); 3012b8e80941Smrg ac_build_indexed_store(ctx, ctx->lds, 3013b8e80941Smrg dw_addr, value); 3014b8e80941Smrg} 3015b8e80941Smrg 3016b8e80941SmrgLLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, 3017b8e80941Smrg LLVMTypeRef dst_type, 3018b8e80941Smrg LLVMValueRef src0) 3019b8e80941Smrg{ 3020b8e80941Smrg unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0)); 3021b8e80941Smrg const char *intrin_name; 3022b8e80941Smrg LLVMTypeRef type; 3023b8e80941Smrg LLVMValueRef zero; 3024b8e80941Smrg 3025b8e80941Smrg switch (src0_bitsize) { 3026b8e80941Smrg case 64: 3027b8e80941Smrg intrin_name = "llvm.cttz.i64"; 3028b8e80941Smrg type = ctx->i64; 3029b8e80941Smrg zero = ctx->i64_0; 3030b8e80941Smrg break; 3031b8e80941Smrg case 32: 3032b8e80941Smrg intrin_name = "llvm.cttz.i32"; 3033b8e80941Smrg type = ctx->i32; 3034b8e80941Smrg zero = ctx->i32_0; 3035b8e80941Smrg break; 3036b8e80941Smrg case 16: 3037b8e80941Smrg intrin_name = "llvm.cttz.i16"; 3038b8e80941Smrg type = ctx->i16; 3039b8e80941Smrg zero = ctx->i16_0; 3040b8e80941Smrg break; 3041b8e80941Smrg case 8: 3042b8e80941Smrg intrin_name = "llvm.cttz.i8"; 3043b8e80941Smrg type = ctx->i8; 3044b8e80941Smrg zero = ctx->i8_0; 3045b8e80941Smrg break; 3046b8e80941Smrg default: 3047b8e80941Smrg unreachable(!"invalid bitsize"); 3048b8e80941Smrg } 3049b8e80941Smrg 3050b8e80941Smrg LLVMValueRef params[2] = { 3051b8e80941Smrg src0, 3052b8e80941Smrg 3053b8e80941Smrg /* The value of 1 means that ffs(x=0) = undef, so LLVM won't 3054b8e80941Smrg * add special code to check for x=0. The reason is that 3055b8e80941Smrg * the LLVM behavior for x=0 is different from what we 3056b8e80941Smrg * need here. However, LLVM also assumes that ffs(x) is 3057b8e80941Smrg * in [0, 31], but GLSL expects that ffs(0) = -1, so 3058b8e80941Smrg * a conditional assignment to handle 0 is still required. 3059b8e80941Smrg * 3060b8e80941Smrg * The hardware already implements the correct behavior. 3061b8e80941Smrg */ 3062b8e80941Smrg ctx->i1true, 3063b8e80941Smrg }; 3064b8e80941Smrg 3065b8e80941Smrg LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, 3066b8e80941Smrg params, 2, 3067b8e80941Smrg AC_FUNC_ATTR_READNONE); 3068b8e80941Smrg 3069b8e80941Smrg if (src0_bitsize == 64) { 3070b8e80941Smrg lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, ""); 3071b8e80941Smrg } else if (src0_bitsize < 32) { 3072b8e80941Smrg lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, ""); 3073b8e80941Smrg } 3074b8e80941Smrg 3075b8e80941Smrg /* TODO: We need an intrinsic to skip this conditional. */ 3076b8e80941Smrg /* Check for zero: */ 3077b8e80941Smrg return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, 3078b8e80941Smrg LLVMIntEQ, src0, 3079b8e80941Smrg zero, ""), 3080b8e80941Smrg LLVMConstInt(ctx->i32, -1, 0), lsb, ""); 3081b8e80941Smrg} 3082b8e80941Smrg 3083b8e80941SmrgLLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) 3084b8e80941Smrg{ 3085b8e80941Smrg return LLVMPointerType(LLVMArrayType(elem_type, 0), 3086b8e80941Smrg AC_ADDR_SPACE_CONST); 3087b8e80941Smrg} 3088b8e80941Smrg 3089b8e80941SmrgLLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) 3090b8e80941Smrg{ 3091b8e80941Smrg return LLVMPointerType(LLVMArrayType(elem_type, 0), 3092b8e80941Smrg AC_ADDR_SPACE_CONST_32BIT); 3093b8e80941Smrg} 3094b8e80941Smrg 3095b8e80941Smrgstatic struct ac_llvm_flow * 3096b8e80941Smrgget_current_flow(struct ac_llvm_context *ctx) 3097b8e80941Smrg{ 3098b8e80941Smrg if (ctx->flow_depth > 0) 3099b8e80941Smrg return &ctx->flow[ctx->flow_depth - 1]; 3100b8e80941Smrg return NULL; 3101b8e80941Smrg} 3102b8e80941Smrg 3103b8e80941Smrgstatic struct ac_llvm_flow * 3104b8e80941Smrgget_innermost_loop(struct ac_llvm_context *ctx) 3105b8e80941Smrg{ 3106b8e80941Smrg for (unsigned i = ctx->flow_depth; i > 0; --i) { 3107b8e80941Smrg if (ctx->flow[i - 1].loop_entry_block) 3108b8e80941Smrg return &ctx->flow[i - 1]; 3109b8e80941Smrg } 3110b8e80941Smrg return NULL; 3111b8e80941Smrg} 3112b8e80941Smrg 3113b8e80941Smrgstatic struct ac_llvm_flow * 3114b8e80941Smrgpush_flow(struct ac_llvm_context *ctx) 3115b8e80941Smrg{ 3116b8e80941Smrg struct ac_llvm_flow *flow; 3117b8e80941Smrg 3118b8e80941Smrg if (ctx->flow_depth >= ctx->flow_depth_max) { 3119b8e80941Smrg unsigned new_max = MAX2(ctx->flow_depth << 1, 3120b8e80941Smrg AC_LLVM_INITIAL_CF_DEPTH); 3121b8e80941Smrg 3122b8e80941Smrg ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow)); 3123b8e80941Smrg ctx->flow_depth_max = new_max; 3124b8e80941Smrg } 3125b8e80941Smrg 3126b8e80941Smrg flow = &ctx->flow[ctx->flow_depth]; 3127b8e80941Smrg ctx->flow_depth++; 3128b8e80941Smrg 3129b8e80941Smrg flow->next_block = NULL; 3130b8e80941Smrg flow->loop_entry_block = NULL; 3131b8e80941Smrg return flow; 3132b8e80941Smrg} 3133b8e80941Smrg 3134b8e80941Smrgstatic void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, 3135b8e80941Smrg int label_id) 3136b8e80941Smrg{ 3137b8e80941Smrg char buf[32]; 3138b8e80941Smrg snprintf(buf, sizeof(buf), "%s%d", base, label_id); 3139b8e80941Smrg LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); 3140b8e80941Smrg} 3141b8e80941Smrg 3142b8e80941Smrg/* Append a basic block at the level of the parent flow. 3143b8e80941Smrg */ 3144b8e80941Smrgstatic LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, 3145b8e80941Smrg const char *name) 3146b8e80941Smrg{ 3147b8e80941Smrg assert(ctx->flow_depth >= 1); 3148b8e80941Smrg 3149b8e80941Smrg if (ctx->flow_depth >= 2) { 3150b8e80941Smrg struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2]; 3151b8e80941Smrg 3152b8e80941Smrg return LLVMInsertBasicBlockInContext(ctx->context, 3153b8e80941Smrg flow->next_block, name); 3154b8e80941Smrg } 3155b8e80941Smrg 3156b8e80941Smrg LLVMValueRef main_fn = 3157b8e80941Smrg LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)); 3158b8e80941Smrg return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name); 3159b8e80941Smrg} 3160b8e80941Smrg 3161b8e80941Smrg/* Emit a branch to the given default target for the current block if 3162b8e80941Smrg * applicable -- that is, if the current block does not already contain a 3163b8e80941Smrg * branch from a break or continue. 3164b8e80941Smrg */ 3165b8e80941Smrgstatic void emit_default_branch(LLVMBuilderRef builder, 3166b8e80941Smrg LLVMBasicBlockRef target) 3167b8e80941Smrg{ 3168b8e80941Smrg if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) 3169b8e80941Smrg LLVMBuildBr(builder, target); 3170b8e80941Smrg} 3171b8e80941Smrg 3172b8e80941Smrgvoid ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id) 3173b8e80941Smrg{ 3174b8e80941Smrg struct ac_llvm_flow *flow = push_flow(ctx); 3175b8e80941Smrg flow->loop_entry_block = append_basic_block(ctx, "LOOP"); 3176b8e80941Smrg flow->next_block = append_basic_block(ctx, "ENDLOOP"); 3177b8e80941Smrg set_basicblock_name(flow->loop_entry_block, "loop", label_id); 3178b8e80941Smrg LLVMBuildBr(ctx->builder, flow->loop_entry_block); 3179b8e80941Smrg LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block); 3180b8e80941Smrg} 3181b8e80941Smrg 3182b8e80941Smrgvoid ac_build_break(struct ac_llvm_context *ctx) 3183b8e80941Smrg{ 3184b8e80941Smrg struct ac_llvm_flow *flow = get_innermost_loop(ctx); 3185b8e80941Smrg LLVMBuildBr(ctx->builder, flow->next_block); 3186b8e80941Smrg} 3187b8e80941Smrg 3188b8e80941Smrgvoid ac_build_continue(struct ac_llvm_context *ctx) 3189b8e80941Smrg{ 3190b8e80941Smrg struct ac_llvm_flow *flow = get_innermost_loop(ctx); 3191b8e80941Smrg LLVMBuildBr(ctx->builder, flow->loop_entry_block); 3192b8e80941Smrg} 3193b8e80941Smrg 3194b8e80941Smrgvoid ac_build_else(struct ac_llvm_context *ctx, int label_id) 3195b8e80941Smrg{ 3196b8e80941Smrg struct ac_llvm_flow *current_branch = get_current_flow(ctx); 3197b8e80941Smrg LLVMBasicBlockRef endif_block; 3198b8e80941Smrg 3199b8e80941Smrg assert(!current_branch->loop_entry_block); 3200b8e80941Smrg 3201b8e80941Smrg endif_block = append_basic_block(ctx, "ENDIF"); 3202b8e80941Smrg emit_default_branch(ctx->builder, endif_block); 3203b8e80941Smrg 3204b8e80941Smrg LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); 3205b8e80941Smrg set_basicblock_name(current_branch->next_block, "else", label_id); 3206b8e80941Smrg 3207b8e80941Smrg current_branch->next_block = endif_block; 3208b8e80941Smrg} 3209b8e80941Smrg 3210b8e80941Smrgvoid ac_build_endif(struct ac_llvm_context *ctx, int label_id) 3211b8e80941Smrg{ 3212b8e80941Smrg struct ac_llvm_flow *current_branch = get_current_flow(ctx); 3213b8e80941Smrg 3214b8e80941Smrg assert(!current_branch->loop_entry_block); 3215b8e80941Smrg 3216b8e80941Smrg emit_default_branch(ctx->builder, current_branch->next_block); 3217b8e80941Smrg LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); 3218b8e80941Smrg set_basicblock_name(current_branch->next_block, "endif", label_id); 3219b8e80941Smrg 3220b8e80941Smrg ctx->flow_depth--; 3221b8e80941Smrg} 3222b8e80941Smrg 3223b8e80941Smrgvoid ac_build_endloop(struct ac_llvm_context *ctx, int label_id) 3224b8e80941Smrg{ 3225b8e80941Smrg struct ac_llvm_flow *current_loop = get_current_flow(ctx); 3226b8e80941Smrg 3227b8e80941Smrg assert(current_loop->loop_entry_block); 3228b8e80941Smrg 3229b8e80941Smrg emit_default_branch(ctx->builder, current_loop->loop_entry_block); 3230b8e80941Smrg 3231b8e80941Smrg LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block); 3232b8e80941Smrg set_basicblock_name(current_loop->next_block, "endloop", label_id); 3233b8e80941Smrg ctx->flow_depth--; 3234b8e80941Smrg} 3235b8e80941Smrg 3236b8e80941Smrgvoid ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id) 3237b8e80941Smrg{ 3238b8e80941Smrg struct ac_llvm_flow *flow = push_flow(ctx); 3239b8e80941Smrg LLVMBasicBlockRef if_block; 3240b8e80941Smrg 3241b8e80941Smrg if_block = append_basic_block(ctx, "IF"); 3242b8e80941Smrg flow->next_block = append_basic_block(ctx, "ELSE"); 3243b8e80941Smrg set_basicblock_name(if_block, "if", label_id); 3244b8e80941Smrg LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block); 3245b8e80941Smrg LLVMPositionBuilderAtEnd(ctx->builder, if_block); 3246b8e80941Smrg} 3247b8e80941Smrg 3248b8e80941Smrgvoid ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, 3249b8e80941Smrg int label_id) 3250b8e80941Smrg{ 3251b8e80941Smrg LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, 3252b8e80941Smrg value, ctx->f32_0, ""); 3253b8e80941Smrg ac_build_ifcc(ctx, cond, label_id); 3254b8e80941Smrg} 3255b8e80941Smrg 3256b8e80941Smrgvoid ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, 3257b8e80941Smrg int label_id) 3258b8e80941Smrg{ 3259b8e80941Smrg LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, 3260b8e80941Smrg ac_to_integer(ctx, value), 3261b8e80941Smrg ctx->i32_0, ""); 3262b8e80941Smrg ac_build_ifcc(ctx, cond, label_id); 3263b8e80941Smrg} 3264b8e80941Smrg 3265b8e80941SmrgLLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, 3266b8e80941Smrg const char *name) 3267b8e80941Smrg{ 3268b8e80941Smrg LLVMBuilderRef builder = ac->builder; 3269b8e80941Smrg LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); 3270b8e80941Smrg LLVMValueRef function = LLVMGetBasicBlockParent(current_block); 3271b8e80941Smrg LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); 3272b8e80941Smrg LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); 3273b8e80941Smrg LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context); 3274b8e80941Smrg LLVMValueRef res; 3275b8e80941Smrg 3276b8e80941Smrg if (first_instr) { 3277b8e80941Smrg LLVMPositionBuilderBefore(first_builder, first_instr); 3278b8e80941Smrg } else { 3279b8e80941Smrg LLVMPositionBuilderAtEnd(first_builder, first_block); 3280b8e80941Smrg } 3281b8e80941Smrg 3282b8e80941Smrg res = LLVMBuildAlloca(first_builder, type, name); 3283b8e80941Smrg LLVMDisposeBuilder(first_builder); 3284b8e80941Smrg return res; 3285b8e80941Smrg} 3286b8e80941Smrg 3287b8e80941SmrgLLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, 3288b8e80941Smrg LLVMTypeRef type, const char *name) 3289b8e80941Smrg{ 3290b8e80941Smrg LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name); 3291b8e80941Smrg LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr); 3292b8e80941Smrg return ptr; 3293b8e80941Smrg} 3294b8e80941Smrg 3295b8e80941SmrgLLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, 3296b8e80941Smrg LLVMTypeRef type) 3297b8e80941Smrg{ 3298b8e80941Smrg int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); 3299b8e80941Smrg return LLVMBuildBitCast(ctx->builder, ptr, 3300b8e80941Smrg LLVMPointerType(type, addr_space), ""); 3301b8e80941Smrg} 3302b8e80941Smrg 3303b8e80941SmrgLLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, 3304b8e80941Smrg unsigned count) 3305b8e80941Smrg{ 3306b8e80941Smrg unsigned num_components = ac_get_llvm_num_components(value); 3307b8e80941Smrg if (count == num_components) 3308b8e80941Smrg return value; 3309b8e80941Smrg 3310b8e80941Smrg LLVMValueRef masks[MAX2(count, 2)]; 3311b8e80941Smrg masks[0] = ctx->i32_0; 3312b8e80941Smrg masks[1] = ctx->i32_1; 3313b8e80941Smrg for (unsigned i = 2; i < count; i++) 3314b8e80941Smrg masks[i] = LLVMConstInt(ctx->i32, i, false); 3315b8e80941Smrg 3316b8e80941Smrg if (count == 1) 3317b8e80941Smrg return LLVMBuildExtractElement(ctx->builder, value, masks[0], 3318b8e80941Smrg ""); 3319b8e80941Smrg 3320b8e80941Smrg LLVMValueRef swizzle = LLVMConstVector(masks, count); 3321b8e80941Smrg return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); 3322b8e80941Smrg} 3323b8e80941Smrg 3324b8e80941SmrgLLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, 3325b8e80941Smrg unsigned rshift, unsigned bitwidth) 3326b8e80941Smrg{ 3327b8e80941Smrg LLVMValueRef value = param; 3328b8e80941Smrg if (rshift) 3329b8e80941Smrg value = LLVMBuildLShr(ctx->builder, value, 3330b8e80941Smrg LLVMConstInt(ctx->i32, rshift, false), ""); 3331b8e80941Smrg 3332b8e80941Smrg if (rshift + bitwidth < 32) { 3333b8e80941Smrg unsigned mask = (1 << bitwidth) - 1; 3334b8e80941Smrg value = LLVMBuildAnd(ctx->builder, value, 3335b8e80941Smrg LLVMConstInt(ctx->i32, mask, false), ""); 3336b8e80941Smrg } 3337b8e80941Smrg return value; 3338b8e80941Smrg} 3339b8e80941Smrg 3340b8e80941Smrg/* Adjust the sample index according to FMASK. 3341b8e80941Smrg * 3342b8e80941Smrg * For uncompressed MSAA surfaces, FMASK should return 0x76543210, 3343b8e80941Smrg * which is the identity mapping. Each nibble says which physical sample 3344b8e80941Smrg * should be fetched to get that sample. 3345b8e80941Smrg * 3346b8e80941Smrg * For example, 0x11111100 means there are only 2 samples stored and 3347b8e80941Smrg * the second sample covers 3/4 of the pixel. When reading samples 0 3348b8e80941Smrg * and 1, return physical sample 0 (determined by the first two 0s 3349b8e80941Smrg * in FMASK), otherwise return physical sample 1. 3350b8e80941Smrg * 3351b8e80941Smrg * The sample index should be adjusted as follows: 3352b8e80941Smrg * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF; 3353b8e80941Smrg */ 3354b8e80941Smrgvoid ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, 3355b8e80941Smrg LLVMValueRef *addr, bool is_array_tex) 3356b8e80941Smrg{ 3357b8e80941Smrg struct ac_image_args fmask_load = {}; 3358b8e80941Smrg fmask_load.opcode = ac_image_load; 3359b8e80941Smrg fmask_load.resource = fmask; 3360b8e80941Smrg fmask_load.dmask = 0xf; 3361b8e80941Smrg fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; 3362b8e80941Smrg fmask_load.attributes = AC_FUNC_ATTR_READNONE; 3363b8e80941Smrg 3364b8e80941Smrg fmask_load.coords[0] = addr[0]; 3365b8e80941Smrg fmask_load.coords[1] = addr[1]; 3366b8e80941Smrg if (is_array_tex) 3367b8e80941Smrg fmask_load.coords[2] = addr[2]; 3368b8e80941Smrg 3369b8e80941Smrg LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); 3370b8e80941Smrg fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, 3371b8e80941Smrg ac->i32_0, ""); 3372b8e80941Smrg 3373b8e80941Smrg /* Apply the formula. */ 3374b8e80941Smrg unsigned sample_chan = is_array_tex ? 3 : 2; 3375b8e80941Smrg LLVMValueRef final_sample; 3376b8e80941Smrg final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], 3377b8e80941Smrg LLVMConstInt(ac->i32, 4, 0), ""); 3378b8e80941Smrg final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, ""); 3379b8e80941Smrg /* Mask the sample index by 0x7, because 0x8 means an unknown value 3380b8e80941Smrg * with EQAA, so those will map to 0. */ 3381b8e80941Smrg final_sample = LLVMBuildAnd(ac->builder, final_sample, 3382b8e80941Smrg LLVMConstInt(ac->i32, 0x7, 0), ""); 3383b8e80941Smrg 3384b8e80941Smrg /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK 3385b8e80941Smrg * resource descriptor is 0 (invalid). 3386b8e80941Smrg */ 3387b8e80941Smrg LLVMValueRef tmp; 3388b8e80941Smrg tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, ""); 3389b8e80941Smrg tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, ""); 3390b8e80941Smrg tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, ""); 3391b8e80941Smrg 3392b8e80941Smrg /* Replace the MSAA sample index. */ 3393b8e80941Smrg addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample, 3394b8e80941Smrg addr[sample_chan], ""); 3395b8e80941Smrg} 3396b8e80941Smrg 3397b8e80941Smrgstatic LLVMValueRef 3398b8e80941Smrg_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) 3399b8e80941Smrg{ 3400b8e80941Smrg ac_build_optimization_barrier(ctx, &src); 3401b8e80941Smrg return ac_build_intrinsic(ctx, 3402b8e80941Smrg lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane", 3403b8e80941Smrg LLVMTypeOf(src), (LLVMValueRef []) { 3404b8e80941Smrg src, lane }, 3405b8e80941Smrg lane == NULL ? 1 : 2, 3406b8e80941Smrg AC_FUNC_ATTR_READNONE | 3407b8e80941Smrg AC_FUNC_ATTR_CONVERGENT); 3408b8e80941Smrg} 3409b8e80941Smrg 3410b8e80941Smrg/** 3411b8e80941Smrg * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic. 3412b8e80941Smrg * @param ctx 3413b8e80941Smrg * @param src 3414b8e80941Smrg * @param lane - id of the lane or NULL for the first active lane 3415b8e80941Smrg * @return value of the lane 3416b8e80941Smrg */ 3417b8e80941SmrgLLVMValueRef 3418b8e80941Smrgac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) 3419b8e80941Smrg{ 3420b8e80941Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 3421b8e80941Smrg src = ac_to_integer(ctx, src); 3422b8e80941Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); 3423b8e80941Smrg LLVMValueRef ret; 3424b8e80941Smrg 3425b8e80941Smrg if (bits == 32) { 3426b8e80941Smrg ret = _ac_build_readlane(ctx, src, lane); 3427b8e80941Smrg } else { 3428b8e80941Smrg assert(bits % 32 == 0); 3429b8e80941Smrg LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); 3430b8e80941Smrg LLVMValueRef src_vector = 3431b8e80941Smrg LLVMBuildBitCast(ctx->builder, src, vec_type, ""); 3432b8e80941Smrg ret = LLVMGetUndef(vec_type); 3433b8e80941Smrg for (unsigned i = 0; i < bits / 32; i++) { 3434b8e80941Smrg src = LLVMBuildExtractElement(ctx->builder, src_vector, 3435b8e80941Smrg LLVMConstInt(ctx->i32, i, 0), ""); 3436b8e80941Smrg LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane); 3437b8e80941Smrg ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp, 3438b8e80941Smrg LLVMConstInt(ctx->i32, i, 0), ""); 3439b8e80941Smrg } 3440b8e80941Smrg } 3441b8e80941Smrg if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) 3442b8e80941Smrg return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); 3443b8e80941Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 3444b8e80941Smrg} 3445b8e80941Smrg 3446b8e80941SmrgLLVMValueRef 3447b8e80941Smrgac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane) 3448b8e80941Smrg{ 3449b8e80941Smrg /* TODO: Use the actual instruction when LLVM adds an intrinsic for it. 3450b8e80941Smrg */ 3451b8e80941Smrg LLVMValueRef pred = LLVMBuildICmp(ctx->builder, LLVMIntEQ, lane, 3452b8e80941Smrg ac_get_thread_id(ctx), ""); 3453b8e80941Smrg return LLVMBuildSelect(ctx->builder, pred, value, src, ""); 3454b8e80941Smrg} 3455b8e80941Smrg 3456b8e80941SmrgLLVMValueRef 3457b8e80941Smrgac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) 3458b8e80941Smrg{ 3459b8e80941Smrg LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, 3460b8e80941Smrg LLVMVectorType(ctx->i32, 2), 3461b8e80941Smrg ""); 3462b8e80941Smrg LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, 3463b8e80941Smrg ctx->i32_0, ""); 3464b8e80941Smrg LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, 3465b8e80941Smrg ctx->i32_1, ""); 3466b8e80941Smrg LLVMValueRef val = 3467b8e80941Smrg ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, 3468b8e80941Smrg (LLVMValueRef []) { mask_lo, ctx->i32_0 }, 3469b8e80941Smrg 2, AC_FUNC_ATTR_READNONE); 3470b8e80941Smrg val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, 3471b8e80941Smrg (LLVMValueRef []) { mask_hi, val }, 3472b8e80941Smrg 2, AC_FUNC_ATTR_READNONE); 3473b8e80941Smrg return val; 3474b8e80941Smrg} 3475b8e80941Smrg 3476b8e80941Smrgenum dpp_ctrl { 3477b8e80941Smrg _dpp_quad_perm = 0x000, 3478b8e80941Smrg _dpp_row_sl = 0x100, 3479b8e80941Smrg _dpp_row_sr = 0x110, 3480b8e80941Smrg _dpp_row_rr = 0x120, 3481b8e80941Smrg dpp_wf_sl1 = 0x130, 3482b8e80941Smrg dpp_wf_rl1 = 0x134, 3483b8e80941Smrg dpp_wf_sr1 = 0x138, 3484b8e80941Smrg dpp_wf_rr1 = 0x13C, 3485b8e80941Smrg dpp_row_mirror = 0x140, 3486b8e80941Smrg dpp_row_half_mirror = 0x141, 3487b8e80941Smrg dpp_row_bcast15 = 0x142, 3488b8e80941Smrg dpp_row_bcast31 = 0x143 3489b8e80941Smrg}; 3490b8e80941Smrg 3491b8e80941Smrgstatic inline enum dpp_ctrl 3492b8e80941Smrgdpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) 3493b8e80941Smrg{ 3494b8e80941Smrg assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); 3495b8e80941Smrg return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6); 3496b8e80941Smrg} 3497b8e80941Smrg 3498b8e80941Smrgstatic inline enum dpp_ctrl 3499b8e80941Smrgdpp_row_sl(unsigned amount) 3500b8e80941Smrg{ 3501b8e80941Smrg assert(amount > 0 && amount < 16); 3502b8e80941Smrg return _dpp_row_sl | amount; 3503b8e80941Smrg} 3504b8e80941Smrg 3505b8e80941Smrgstatic inline enum dpp_ctrl 3506b8e80941Smrgdpp_row_sr(unsigned amount) 3507b8e80941Smrg{ 3508b8e80941Smrg assert(amount > 0 && amount < 16); 3509b8e80941Smrg return _dpp_row_sr | amount; 3510b8e80941Smrg} 3511b8e80941Smrg 3512b8e80941Smrgstatic LLVMValueRef 3513b8e80941Smrg_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, 3514b8e80941Smrg enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, 3515b8e80941Smrg bool bound_ctrl) 3516b8e80941Smrg{ 3517b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32", 3518b8e80941Smrg LLVMTypeOf(old), 3519b8e80941Smrg (LLVMValueRef[]) { 3520b8e80941Smrg old, src, 3521b8e80941Smrg LLVMConstInt(ctx->i32, dpp_ctrl, 0), 3522b8e80941Smrg LLVMConstInt(ctx->i32, row_mask, 0), 3523b8e80941Smrg LLVMConstInt(ctx->i32, bank_mask, 0), 3524b8e80941Smrg LLVMConstInt(ctx->i1, bound_ctrl, 0) }, 3525b8e80941Smrg 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 3526b8e80941Smrg} 3527b8e80941Smrg 3528b8e80941Smrgstatic LLVMValueRef 3529b8e80941Smrgac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src, 3530b8e80941Smrg enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask, 3531b8e80941Smrg bool bound_ctrl) 3532b8e80941Smrg{ 3533b8e80941Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 3534b8e80941Smrg src = ac_to_integer(ctx, src); 3535b8e80941Smrg old = ac_to_integer(ctx, old); 3536b8e80941Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); 3537b8e80941Smrg LLVMValueRef ret; 3538b8e80941Smrg if (bits == 32) { 3539b8e80941Smrg ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, 3540b8e80941Smrg bank_mask, bound_ctrl); 3541b8e80941Smrg } else { 3542b8e80941Smrg assert(bits % 32 == 0); 3543b8e80941Smrg LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); 3544b8e80941Smrg LLVMValueRef src_vector = 3545b8e80941Smrg LLVMBuildBitCast(ctx->builder, src, vec_type, ""); 3546b8e80941Smrg LLVMValueRef old_vector = 3547b8e80941Smrg LLVMBuildBitCast(ctx->builder, old, vec_type, ""); 3548b8e80941Smrg ret = LLVMGetUndef(vec_type); 3549b8e80941Smrg for (unsigned i = 0; i < bits / 32; i++) { 3550b8e80941Smrg src = LLVMBuildExtractElement(ctx->builder, src_vector, 3551b8e80941Smrg LLVMConstInt(ctx->i32, i, 3552b8e80941Smrg 0), ""); 3553b8e80941Smrg old = LLVMBuildExtractElement(ctx->builder, old_vector, 3554b8e80941Smrg LLVMConstInt(ctx->i32, i, 3555b8e80941Smrg 0), ""); 3556b8e80941Smrg LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src, 3557b8e80941Smrg dpp_ctrl, 3558b8e80941Smrg row_mask, 3559b8e80941Smrg bank_mask, 3560b8e80941Smrg bound_ctrl); 3561b8e80941Smrg ret = LLVMBuildInsertElement(ctx->builder, ret, 3562b8e80941Smrg ret_comp, 3563b8e80941Smrg LLVMConstInt(ctx->i32, i, 3564b8e80941Smrg 0), ""); 3565b8e80941Smrg } 3566b8e80941Smrg } 3567b8e80941Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 3568b8e80941Smrg} 3569b8e80941Smrg 3570b8e80941Smrgstatic inline unsigned 3571b8e80941Smrgds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) 3572b8e80941Smrg{ 3573b8e80941Smrg assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); 3574b8e80941Smrg return and_mask | (or_mask << 5) | (xor_mask << 10); 3575b8e80941Smrg} 3576b8e80941Smrg 3577b8e80941Smrgstatic LLVMValueRef 3578b8e80941Smrg_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask) 3579b8e80941Smrg{ 3580b8e80941Smrg return ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", 3581b8e80941Smrg LLVMTypeOf(src), (LLVMValueRef []) { 3582b8e80941Smrg src, LLVMConstInt(ctx->i32, mask, 0) }, 3583b8e80941Smrg 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); 3584b8e80941Smrg} 3585b8e80941Smrg 3586b8e80941SmrgLLVMValueRef 3587b8e80941Smrgac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask) 3588b8e80941Smrg{ 3589b8e80941Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 3590b8e80941Smrg src = ac_to_integer(ctx, src); 3591b8e80941Smrg unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src)); 3592b8e80941Smrg LLVMValueRef ret; 3593b8e80941Smrg if (bits == 32) { 3594b8e80941Smrg ret = _ac_build_ds_swizzle(ctx, src, mask); 3595b8e80941Smrg } else { 3596b8e80941Smrg assert(bits % 32 == 0); 3597b8e80941Smrg LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32); 3598b8e80941Smrg LLVMValueRef src_vector = 3599b8e80941Smrg LLVMBuildBitCast(ctx->builder, src, vec_type, ""); 3600b8e80941Smrg ret = LLVMGetUndef(vec_type); 3601b8e80941Smrg for (unsigned i = 0; i < bits / 32; i++) { 3602b8e80941Smrg src = LLVMBuildExtractElement(ctx->builder, src_vector, 3603b8e80941Smrg LLVMConstInt(ctx->i32, i, 3604b8e80941Smrg 0), ""); 3605b8e80941Smrg LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, 3606b8e80941Smrg mask); 3607b8e80941Smrg ret = LLVMBuildInsertElement(ctx->builder, ret, 3608b8e80941Smrg ret_comp, 3609b8e80941Smrg LLVMConstInt(ctx->i32, i, 3610b8e80941Smrg 0), ""); 3611b8e80941Smrg } 3612b8e80941Smrg } 3613b8e80941Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 3614b8e80941Smrg} 3615b8e80941Smrg 3616b8e80941Smrgstatic LLVMValueRef 3617b8e80941Smrgac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src) 3618b8e80941Smrg{ 3619b8e80941Smrg char name[32], type[8]; 3620b8e80941Smrg ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); 3621b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type); 3622b8e80941Smrg return ac_build_intrinsic(ctx, name, LLVMTypeOf(src), 3623b8e80941Smrg (LLVMValueRef []) { src }, 1, 3624b8e80941Smrg AC_FUNC_ATTR_READNONE); 3625b8e80941Smrg} 3626b8e80941Smrg 3627b8e80941Smrgstatic LLVMValueRef 3628b8e80941Smrgac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src, 3629b8e80941Smrg LLVMValueRef inactive) 3630b8e80941Smrg{ 3631b8e80941Smrg char name[33], type[8]; 3632b8e80941Smrg LLVMTypeRef src_type = LLVMTypeOf(src); 3633b8e80941Smrg src = ac_to_integer(ctx, src); 3634b8e80941Smrg inactive = ac_to_integer(ctx, inactive); 3635b8e80941Smrg ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type)); 3636b8e80941Smrg snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type); 3637b8e80941Smrg LLVMValueRef ret = 3638b8e80941Smrg ac_build_intrinsic(ctx, name, 3639b8e80941Smrg LLVMTypeOf(src), (LLVMValueRef []) { 3640b8e80941Smrg src, inactive }, 2, 3641b8e80941Smrg AC_FUNC_ATTR_READNONE | 3642b8e80941Smrg AC_FUNC_ATTR_CONVERGENT); 3643b8e80941Smrg return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); 3644b8e80941Smrg} 3645b8e80941Smrg 3646b8e80941Smrgstatic LLVMValueRef 3647b8e80941Smrgget_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size) 3648b8e80941Smrg{ 3649b8e80941Smrg if (type_size == 4) { 3650b8e80941Smrg switch (op) { 3651b8e80941Smrg case nir_op_iadd: return ctx->i32_0; 3652b8e80941Smrg case nir_op_fadd: return ctx->f32_0; 3653b8e80941Smrg case nir_op_imul: return ctx->i32_1; 3654b8e80941Smrg case nir_op_fmul: return ctx->f32_1; 3655b8e80941Smrg case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0); 3656b8e80941Smrg case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0); 3657b8e80941Smrg case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY); 3658b8e80941Smrg case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0); 3659b8e80941Smrg case nir_op_umax: return ctx->i32_0; 3660b8e80941Smrg case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY); 3661b8e80941Smrg case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0); 3662b8e80941Smrg case nir_op_ior: return ctx->i32_0; 3663b8e80941Smrg case nir_op_ixor: return ctx->i32_0; 3664b8e80941Smrg default: 3665b8e80941Smrg unreachable("bad reduction intrinsic"); 3666b8e80941Smrg } 3667b8e80941Smrg } else { /* type_size == 64bit */ 3668b8e80941Smrg switch (op) { 3669b8e80941Smrg case nir_op_iadd: return ctx->i64_0; 3670b8e80941Smrg case nir_op_fadd: return ctx->f64_0; 3671b8e80941Smrg case nir_op_imul: return ctx->i64_1; 3672b8e80941Smrg case nir_op_fmul: return ctx->f64_1; 3673b8e80941Smrg case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0); 3674b8e80941Smrg case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0); 3675b8e80941Smrg case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY); 3676b8e80941Smrg case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0); 3677b8e80941Smrg case nir_op_umax: return ctx->i64_0; 3678b8e80941Smrg case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY); 3679b8e80941Smrg case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0); 3680b8e80941Smrg case nir_op_ior: return ctx->i64_0; 3681b8e80941Smrg case nir_op_ixor: return ctx->i64_0; 3682b8e80941Smrg default: 3683b8e80941Smrg unreachable("bad reduction intrinsic"); 3684b8e80941Smrg } 3685b8e80941Smrg } 3686b8e80941Smrg} 3687b8e80941Smrg 3688b8e80941Smrgstatic LLVMValueRef 3689b8e80941Smrgac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op) 3690b8e80941Smrg{ 3691b8e80941Smrg bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8; 3692b8e80941Smrg switch (op) { 3693b8e80941Smrg case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, ""); 3694b8e80941Smrg case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, ""); 3695b8e80941Smrg case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, ""); 3696b8e80941Smrg case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, ""); 3697b8e80941Smrg case nir_op_imin: return LLVMBuildSelect(ctx->builder, 3698b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""), 3699b8e80941Smrg lhs, rhs, ""); 3700b8e80941Smrg case nir_op_umin: return LLVMBuildSelect(ctx->builder, 3701b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""), 3702b8e80941Smrg lhs, rhs, ""); 3703b8e80941Smrg case nir_op_fmin: return ac_build_intrinsic(ctx, 3704b8e80941Smrg _64bit ? "llvm.minnum.f64" : "llvm.minnum.f32", 3705b8e80941Smrg _64bit ? ctx->f64 : ctx->f32, 3706b8e80941Smrg (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE); 3707b8e80941Smrg case nir_op_imax: return LLVMBuildSelect(ctx->builder, 3708b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""), 3709b8e80941Smrg lhs, rhs, ""); 3710b8e80941Smrg case nir_op_umax: return LLVMBuildSelect(ctx->builder, 3711b8e80941Smrg LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""), 3712b8e80941Smrg lhs, rhs, ""); 3713b8e80941Smrg case nir_op_fmax: return ac_build_intrinsic(ctx, 3714b8e80941Smrg _64bit ? "llvm.maxnum.f64" : "llvm.maxnum.f32", 3715b8e80941Smrg _64bit ? ctx->f64 : ctx->f32, 3716b8e80941Smrg (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE); 3717b8e80941Smrg case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, ""); 3718b8e80941Smrg case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, ""); 3719b8e80941Smrg case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, ""); 3720b8e80941Smrg default: 3721b8e80941Smrg unreachable("bad reduction intrinsic"); 3722b8e80941Smrg } 3723b8e80941Smrg} 3724b8e80941Smrg 3725b8e80941Smrg/** 3726b8e80941Smrg * \param maxprefix specifies that the result only needs to be correct for a 3727b8e80941Smrg * prefix of this many threads 3728b8e80941Smrg * 3729b8e80941Smrg * TODO: add inclusive and excluse scan functions for SI chip class. 3730b8e80941Smrg */ 3731b8e80941Smrgstatic LLVMValueRef 3732b8e80941Smrgac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity, 3733b8e80941Smrg unsigned maxprefix) 3734b8e80941Smrg{ 3735b8e80941Smrg LLVMValueRef result, tmp; 3736b8e80941Smrg result = src; 3737b8e80941Smrg if (maxprefix <= 1) 3738b8e80941Smrg return result; 3739b8e80941Smrg tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); 3740b8e80941Smrg result = ac_build_alu_op(ctx, result, tmp, op); 3741b8e80941Smrg if (maxprefix <= 2) 3742b8e80941Smrg return result; 3743b8e80941Smrg tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false); 3744b8e80941Smrg result = ac_build_alu_op(ctx, result, tmp, op); 3745b8e80941Smrg if (maxprefix <= 3) 3746b8e80941Smrg return result; 3747b8e80941Smrg tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false); 3748b8e80941Smrg result = ac_build_alu_op(ctx, result, tmp, op); 3749b8e80941Smrg if (maxprefix <= 4) 3750b8e80941Smrg return result; 3751b8e80941Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false); 3752b8e80941Smrg result = ac_build_alu_op(ctx, result, tmp, op); 3753b8e80941Smrg if (maxprefix <= 8) 3754b8e80941Smrg return result; 3755b8e80941Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false); 3756b8e80941Smrg result = ac_build_alu_op(ctx, result, tmp, op); 3757b8e80941Smrg if (maxprefix <= 16) 3758b8e80941Smrg return result; 3759b8e80941Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); 3760b8e80941Smrg result = ac_build_alu_op(ctx, result, tmp, op); 3761b8e80941Smrg if (maxprefix <= 32) 3762b8e80941Smrg return result; 3763b8e80941Smrg tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); 3764b8e80941Smrg result = ac_build_alu_op(ctx, result, tmp, op); 3765b8e80941Smrg return result; 3766b8e80941Smrg} 3767b8e80941Smrg 3768b8e80941SmrgLLVMValueRef 3769b8e80941Smrgac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) 3770b8e80941Smrg{ 3771b8e80941Smrg LLVMValueRef result; 3772b8e80941Smrg 3773b8e80941Smrg if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { 3774b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 3775b8e80941Smrg src = LLVMBuildZExt(builder, src, ctx->i32, ""); 3776b8e80941Smrg result = ac_build_ballot(ctx, src); 3777b8e80941Smrg result = ac_build_mbcnt(ctx, result); 3778b8e80941Smrg result = LLVMBuildAdd(builder, result, src, ""); 3779b8e80941Smrg return result; 3780b8e80941Smrg } 3781b8e80941Smrg 3782b8e80941Smrg ac_build_optimization_barrier(ctx, &src); 3783b8e80941Smrg 3784b8e80941Smrg LLVMValueRef identity = 3785b8e80941Smrg get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); 3786b8e80941Smrg result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), 3787b8e80941Smrg LLVMTypeOf(identity), ""); 3788b8e80941Smrg result = ac_build_scan(ctx, op, result, identity, 64); 3789b8e80941Smrg 3790b8e80941Smrg return ac_build_wwm(ctx, result); 3791b8e80941Smrg} 3792b8e80941Smrg 3793b8e80941SmrgLLVMValueRef 3794b8e80941Smrgac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) 3795b8e80941Smrg{ 3796b8e80941Smrg LLVMValueRef result; 3797b8e80941Smrg 3798b8e80941Smrg if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { 3799b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 3800b8e80941Smrg src = LLVMBuildZExt(builder, src, ctx->i32, ""); 3801b8e80941Smrg result = ac_build_ballot(ctx, src); 3802b8e80941Smrg result = ac_build_mbcnt(ctx, result); 3803b8e80941Smrg return result; 3804b8e80941Smrg } 3805b8e80941Smrg 3806b8e80941Smrg ac_build_optimization_barrier(ctx, &src); 3807b8e80941Smrg 3808b8e80941Smrg LLVMValueRef identity = 3809b8e80941Smrg get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); 3810b8e80941Smrg result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), 3811b8e80941Smrg LLVMTypeOf(identity), ""); 3812b8e80941Smrg result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, false); 3813b8e80941Smrg result = ac_build_scan(ctx, op, result, identity, 64); 3814b8e80941Smrg 3815b8e80941Smrg return ac_build_wwm(ctx, result); 3816b8e80941Smrg} 3817b8e80941Smrg 3818b8e80941SmrgLLVMValueRef 3819b8e80941Smrgac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size) 3820b8e80941Smrg{ 3821b8e80941Smrg if (cluster_size == 1) return src; 3822b8e80941Smrg ac_build_optimization_barrier(ctx, &src); 3823b8e80941Smrg LLVMValueRef result, swap; 3824b8e80941Smrg LLVMValueRef identity = get_reduction_identity(ctx, op, 3825b8e80941Smrg ac_get_type_size(LLVMTypeOf(src))); 3826b8e80941Smrg result = LLVMBuildBitCast(ctx->builder, 3827b8e80941Smrg ac_build_set_inactive(ctx, src, identity), 3828b8e80941Smrg LLVMTypeOf(identity), ""); 3829b8e80941Smrg swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2); 3830b8e80941Smrg result = ac_build_alu_op(ctx, result, swap, op); 3831b8e80941Smrg if (cluster_size == 2) return ac_build_wwm(ctx, result); 3832b8e80941Smrg 3833b8e80941Smrg swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1); 3834b8e80941Smrg result = ac_build_alu_op(ctx, result, swap, op); 3835b8e80941Smrg if (cluster_size == 4) return ac_build_wwm(ctx, result); 3836b8e80941Smrg 3837b8e80941Smrg if (ctx->chip_class >= VI) 3838b8e80941Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false); 3839b8e80941Smrg else 3840b8e80941Smrg swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04)); 3841b8e80941Smrg result = ac_build_alu_op(ctx, result, swap, op); 3842b8e80941Smrg if (cluster_size == 8) return ac_build_wwm(ctx, result); 3843b8e80941Smrg 3844b8e80941Smrg if (ctx->chip_class >= VI) 3845b8e80941Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false); 3846b8e80941Smrg else 3847b8e80941Smrg swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08)); 3848b8e80941Smrg result = ac_build_alu_op(ctx, result, swap, op); 3849b8e80941Smrg if (cluster_size == 16) return ac_build_wwm(ctx, result); 3850b8e80941Smrg 3851b8e80941Smrg if (ctx->chip_class >= VI && cluster_size != 32) 3852b8e80941Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); 3853b8e80941Smrg else 3854b8e80941Smrg swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10)); 3855b8e80941Smrg result = ac_build_alu_op(ctx, result, swap, op); 3856b8e80941Smrg if (cluster_size == 32) return ac_build_wwm(ctx, result); 3857b8e80941Smrg 3858b8e80941Smrg if (ctx->chip_class >= VI) { 3859b8e80941Smrg swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); 3860b8e80941Smrg result = ac_build_alu_op(ctx, result, swap, op); 3861b8e80941Smrg result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0)); 3862b8e80941Smrg return ac_build_wwm(ctx, result); 3863b8e80941Smrg } else { 3864b8e80941Smrg swap = ac_build_readlane(ctx, result, ctx->i32_0); 3865b8e80941Smrg result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0)); 3866b8e80941Smrg result = ac_build_alu_op(ctx, result, swap, op); 3867b8e80941Smrg return ac_build_wwm(ctx, result); 3868b8e80941Smrg } 3869b8e80941Smrg} 3870b8e80941Smrg 3871b8e80941Smrg/** 3872b8e80941Smrg * "Top half" of a scan that reduces per-wave values across an entire 3873b8e80941Smrg * workgroup. 3874b8e80941Smrg * 3875b8e80941Smrg * The source value must be present in the highest lane of the wave, and the 3876b8e80941Smrg * highest lane must be live. 3877b8e80941Smrg */ 3878b8e80941Smrgvoid 3879b8e80941Smrgac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 3880b8e80941Smrg{ 3881b8e80941Smrg if (ws->maxwaves <= 1) 3882b8e80941Smrg return; 3883b8e80941Smrg 3884b8e80941Smrg const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false); 3885b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 3886b8e80941Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 3887b8e80941Smrg LLVMValueRef tmp; 3888b8e80941Smrg 3889b8e80941Smrg tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, ""); 3890b8e80941Smrg ac_build_ifcc(ctx, tmp, 1000); 3891b8e80941Smrg LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, "")); 3892b8e80941Smrg ac_build_endif(ctx, 1000); 3893b8e80941Smrg} 3894b8e80941Smrg 3895b8e80941Smrg/** 3896b8e80941Smrg * "Bottom half" of a scan that reduces per-wave values across an entire 3897b8e80941Smrg * workgroup. 3898b8e80941Smrg * 3899b8e80941Smrg * The caller must place a barrier between the top and bottom halves. 3900b8e80941Smrg */ 3901b8e80941Smrgvoid 3902b8e80941Smrgac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 3903b8e80941Smrg{ 3904b8e80941Smrg const LLVMTypeRef type = LLVMTypeOf(ws->src); 3905b8e80941Smrg const LLVMValueRef identity = 3906b8e80941Smrg get_reduction_identity(ctx, ws->op, ac_get_type_size(type)); 3907b8e80941Smrg 3908b8e80941Smrg if (ws->maxwaves <= 1) { 3909b8e80941Smrg ws->result_reduce = ws->src; 3910b8e80941Smrg ws->result_inclusive = ws->src; 3911b8e80941Smrg ws->result_exclusive = identity; 3912b8e80941Smrg return; 3913b8e80941Smrg } 3914b8e80941Smrg assert(ws->maxwaves <= 32); 3915b8e80941Smrg 3916b8e80941Smrg LLVMBuilderRef builder = ctx->builder; 3917b8e80941Smrg LLVMValueRef tid = ac_get_thread_id(ctx); 3918b8e80941Smrg LLVMBasicBlockRef bbs[2]; 3919b8e80941Smrg LLVMValueRef phivalues_scan[2]; 3920b8e80941Smrg LLVMValueRef tmp, tmp2; 3921b8e80941Smrg 3922b8e80941Smrg bbs[0] = LLVMGetInsertBlock(builder); 3923b8e80941Smrg phivalues_scan[0] = LLVMGetUndef(type); 3924b8e80941Smrg 3925b8e80941Smrg if (ws->enable_reduce) 3926b8e80941Smrg tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, ""); 3927b8e80941Smrg else if (ws->enable_inclusive) 3928b8e80941Smrg tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, ""); 3929b8e80941Smrg else 3930b8e80941Smrg tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, ""); 3931b8e80941Smrg ac_build_ifcc(ctx, tmp, 1001); 3932b8e80941Smrg { 3933b8e80941Smrg tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), ""); 3934b8e80941Smrg 3935b8e80941Smrg ac_build_optimization_barrier(ctx, &tmp); 3936b8e80941Smrg 3937b8e80941Smrg bbs[1] = LLVMGetInsertBlock(builder); 3938b8e80941Smrg phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves); 3939b8e80941Smrg } 3940b8e80941Smrg ac_build_endif(ctx, 1001); 3941b8e80941Smrg 3942b8e80941Smrg const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs); 3943b8e80941Smrg 3944b8e80941Smrg if (ws->enable_reduce) { 3945b8e80941Smrg tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, ""); 3946b8e80941Smrg ws->result_reduce = ac_build_readlane(ctx, scan, tmp); 3947b8e80941Smrg } 3948b8e80941Smrg if (ws->enable_inclusive) 3949b8e80941Smrg ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx); 3950b8e80941Smrg if (ws->enable_exclusive) { 3951b8e80941Smrg tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, ""); 3952b8e80941Smrg tmp = ac_build_readlane(ctx, scan, tmp); 3953b8e80941Smrg tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, ""); 3954b8e80941Smrg ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, ""); 3955b8e80941Smrg } 3956b8e80941Smrg} 3957b8e80941Smrg 3958b8e80941Smrg/** 3959b8e80941Smrg * Inclusive scan of a per-wave value across an entire workgroup. 3960b8e80941Smrg * 3961b8e80941Smrg * This implies an s_barrier instruction. 3962b8e80941Smrg * 3963b8e80941Smrg * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads 3964b8e80941Smrg * of the workgroup are live. (This requirement cannot easily be relaxed in a 3965b8e80941Smrg * useful manner because of the barrier in the algorithm.) 3966b8e80941Smrg */ 3967b8e80941Smrgvoid 3968b8e80941Smrgac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 3969b8e80941Smrg{ 3970b8e80941Smrg ac_build_wg_wavescan_top(ctx, ws); 3971b8e80941Smrg ac_build_s_barrier(ctx); 3972b8e80941Smrg ac_build_wg_wavescan_bottom(ctx, ws); 3973b8e80941Smrg} 3974b8e80941Smrg 3975b8e80941Smrg/** 3976b8e80941Smrg * "Top half" of a scan that reduces per-thread values across an entire 3977b8e80941Smrg * workgroup. 3978b8e80941Smrg * 3979b8e80941Smrg * All lanes must be active when this code runs. 3980b8e80941Smrg */ 3981b8e80941Smrgvoid 3982b8e80941Smrgac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 3983b8e80941Smrg{ 3984b8e80941Smrg if (ws->enable_exclusive) { 3985b8e80941Smrg ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op); 3986b8e80941Smrg if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd) 3987b8e80941Smrg ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, ""); 3988b8e80941Smrg ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op); 3989b8e80941Smrg } else { 3990b8e80941Smrg ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op); 3991b8e80941Smrg } 3992b8e80941Smrg 3993b8e80941Smrg bool enable_inclusive = ws->enable_inclusive; 3994b8e80941Smrg bool enable_exclusive = ws->enable_exclusive; 3995b8e80941Smrg ws->enable_inclusive = false; 3996b8e80941Smrg ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; 3997b8e80941Smrg ac_build_wg_wavescan_top(ctx, ws); 3998b8e80941Smrg ws->enable_inclusive = enable_inclusive; 3999b8e80941Smrg ws->enable_exclusive = enable_exclusive; 4000b8e80941Smrg} 4001b8e80941Smrg 4002b8e80941Smrg/** 4003b8e80941Smrg * "Bottom half" of a scan that reduces per-thread values across an entire 4004b8e80941Smrg * workgroup. 4005b8e80941Smrg * 4006b8e80941Smrg * The caller must place a barrier between the top and bottom halves. 4007b8e80941Smrg */ 4008b8e80941Smrgvoid 4009b8e80941Smrgac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 4010b8e80941Smrg{ 4011b8e80941Smrg bool enable_inclusive = ws->enable_inclusive; 4012b8e80941Smrg bool enable_exclusive = ws->enable_exclusive; 4013b8e80941Smrg ws->enable_inclusive = false; 4014b8e80941Smrg ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; 4015b8e80941Smrg ac_build_wg_wavescan_bottom(ctx, ws); 4016b8e80941Smrg ws->enable_inclusive = enable_inclusive; 4017b8e80941Smrg ws->enable_exclusive = enable_exclusive; 4018b8e80941Smrg 4019b8e80941Smrg /* ws->result_reduce is already the correct value */ 4020b8e80941Smrg if (ws->enable_inclusive) 4021b8e80941Smrg ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op); 4022b8e80941Smrg if (ws->enable_exclusive) 4023b8e80941Smrg ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op); 4024b8e80941Smrg} 4025b8e80941Smrg 4026b8e80941Smrg/** 4027b8e80941Smrg * A scan that reduces per-thread values across an entire workgroup. 4028b8e80941Smrg * 4029b8e80941Smrg * The caller must ensure that all lanes are active when this code runs 4030b8e80941Smrg * (WWM is insufficient!), because there is an implied barrier. 4031b8e80941Smrg */ 4032b8e80941Smrgvoid 4033b8e80941Smrgac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) 4034b8e80941Smrg{ 4035b8e80941Smrg ac_build_wg_scan_top(ctx, ws); 4036b8e80941Smrg ac_build_s_barrier(ctx); 4037b8e80941Smrg ac_build_wg_scan_bottom(ctx, ws); 4038b8e80941Smrg} 4039b8e80941Smrg 4040b8e80941SmrgLLVMValueRef 4041b8e80941Smrgac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, 4042b8e80941Smrg unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) 4043b8e80941Smrg{ 4044b8e80941Smrg unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3); 4045b8e80941Smrg if (ctx->chip_class >= VI) { 4046b8e80941Smrg return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false); 4047b8e80941Smrg } else { 4048b8e80941Smrg return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask); 4049b8e80941Smrg } 4050b8e80941Smrg} 4051b8e80941Smrg 4052b8e80941SmrgLLVMValueRef 4053b8e80941Smrgac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index) 4054b8e80941Smrg{ 4055b8e80941Smrg index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); 4056b8e80941Smrg return ac_build_intrinsic(ctx, 4057b8e80941Smrg "llvm.amdgcn.ds.bpermute", ctx->i32, 4058b8e80941Smrg (LLVMValueRef []) {index, src}, 2, 4059b8e80941Smrg AC_FUNC_ATTR_READNONE | 4060b8e80941Smrg AC_FUNC_ATTR_CONVERGENT); 4061b8e80941Smrg} 4062b8e80941Smrg 4063b8e80941SmrgLLVMValueRef 4064b8e80941Smrgac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, 4065b8e80941Smrg unsigned bitsize) 4066b8e80941Smrg{ 4067b8e80941Smrg LLVMTypeRef type; 4068b8e80941Smrg char *intr; 4069b8e80941Smrg 4070b8e80941Smrg if (bitsize == 16) { 4071b8e80941Smrg intr = "llvm.amdgcn.frexp.exp.i16.f16"; 4072b8e80941Smrg type = ctx->i16; 4073b8e80941Smrg } else if (bitsize == 32) { 4074b8e80941Smrg intr = "llvm.amdgcn.frexp.exp.i32.f32"; 4075b8e80941Smrg type = ctx->i32; 4076b8e80941Smrg } else { 4077b8e80941Smrg intr = "llvm.amdgcn.frexp.exp.i32.f64"; 4078b8e80941Smrg type = ctx->i32; 4079b8e80941Smrg } 4080b8e80941Smrg 4081b8e80941Smrg LLVMValueRef params[] = { 4082b8e80941Smrg src0, 4083b8e80941Smrg }; 4084b8e80941Smrg return ac_build_intrinsic(ctx, intr, type, params, 1, 4085b8e80941Smrg AC_FUNC_ATTR_READNONE); 4086b8e80941Smrg} 4087b8e80941SmrgLLVMValueRef 4088b8e80941Smrgac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, 4089b8e80941Smrg unsigned bitsize) 4090b8e80941Smrg{ 4091b8e80941Smrg LLVMTypeRef type; 4092b8e80941Smrg char *intr; 4093b8e80941Smrg 4094b8e80941Smrg if (bitsize == 16) { 4095b8e80941Smrg intr = "llvm.amdgcn.frexp.mant.f16"; 4096b8e80941Smrg type = ctx->f16; 4097b8e80941Smrg } else if (bitsize == 32) { 4098b8e80941Smrg intr = "llvm.amdgcn.frexp.mant.f32"; 4099b8e80941Smrg type = ctx->f32; 4100b8e80941Smrg } else { 4101b8e80941Smrg intr = "llvm.amdgcn.frexp.mant.f64"; 4102b8e80941Smrg type = ctx->f64; 4103b8e80941Smrg } 4104b8e80941Smrg 4105b8e80941Smrg LLVMValueRef params[] = { 4106b8e80941Smrg src0, 4107b8e80941Smrg }; 4108b8e80941Smrg return ac_build_intrinsic(ctx, intr, type, params, 1, 4109b8e80941Smrg AC_FUNC_ATTR_READNONE); 4110b8e80941Smrg} 4111b8e80941Smrg 4112b8e80941Smrg/* 4113b8e80941Smrg * this takes an I,J coordinate pair, 4114b8e80941Smrg * and works out the X and Y derivatives. 4115b8e80941Smrg * it returns DDX(I), DDX(J), DDY(I), DDY(J). 4116b8e80941Smrg */ 4117b8e80941SmrgLLVMValueRef 4118b8e80941Smrgac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij) 4119b8e80941Smrg{ 4120b8e80941Smrg LLVMValueRef result[4], a; 4121b8e80941Smrg unsigned i; 4122b8e80941Smrg 4123b8e80941Smrg for (i = 0; i < 2; i++) { 4124b8e80941Smrg a = LLVMBuildExtractElement(ctx->builder, interp_ij, 4125b8e80941Smrg LLVMConstInt(ctx->i32, i, false), ""); 4126b8e80941Smrg result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a); 4127b8e80941Smrg result[2+i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a); 4128b8e80941Smrg } 4129b8e80941Smrg return ac_build_gather_values(ctx, result, 4); 4130b8e80941Smrg} 4131b8e80941Smrg 4132b8e80941SmrgLLVMValueRef 4133b8e80941Smrgac_build_load_helper_invocation(struct ac_llvm_context *ctx) 4134b8e80941Smrg{ 4135b8e80941Smrg LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", 4136b8e80941Smrg ctx->i1, NULL, 0, 4137b8e80941Smrg AC_FUNC_ATTR_READNONE); 4138b8e80941Smrg result = LLVMBuildNot(ctx->builder, result, ""); 4139b8e80941Smrg return LLVMBuildSExt(ctx->builder, result, ctx->i32, ""); 4140b8e80941Smrg} 4141