17ec681f3Smrg/*
27ec681f3Smrg * Copyright 2014 Advanced Micro Devices, Inc.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the
67ec681f3Smrg * "Software"), to deal in the Software without restriction, including
77ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish,
87ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to
97ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to
107ec681f3Smrg * the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
137ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
147ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
157ec681f3Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
167ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
177ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
187ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
197ec681f3Smrg *
207ec681f3Smrg * The above copyright notice and this permission notice (including the
217ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions
227ec681f3Smrg * of the Software.
237ec681f3Smrg *
247ec681f3Smrg */
257ec681f3Smrg/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
267ec681f3Smrg#include "ac_llvm_build.h"
277ec681f3Smrg
287ec681f3Smrg#include "ac_exp_param.h"
297ec681f3Smrg#include "ac_llvm_util.h"
307ec681f3Smrg#include "ac_shader_util.h"
317ec681f3Smrg#include "c11/threads.h"
327ec681f3Smrg#include "shader_enums.h"
337ec681f3Smrg#include "sid.h"
347ec681f3Smrg#include "util/bitscan.h"
357ec681f3Smrg#include "util/macros.h"
367ec681f3Smrg#include "util/u_atomic.h"
377ec681f3Smrg#include "util/u_math.h"
387ec681f3Smrg#include <llvm-c/Core.h>
397ec681f3Smrg#include <llvm/Config/llvm-config.h>
407ec681f3Smrg
417ec681f3Smrg#include <assert.h>
427ec681f3Smrg#include <stdio.h>
437ec681f3Smrg
447ec681f3Smrg#define AC_LLVM_INITIAL_CF_DEPTH 4
457ec681f3Smrg
467ec681f3Smrg/* Data for if/else/endif and bgnloop/endloop control flow structures.
477ec681f3Smrg */
487ec681f3Smrgstruct ac_llvm_flow {
497ec681f3Smrg   /* Loop exit or next part of if/else/endif. */
507ec681f3Smrg   LLVMBasicBlockRef next_block;
517ec681f3Smrg   LLVMBasicBlockRef loop_entry_block;
527ec681f3Smrg};
537ec681f3Smrg
547ec681f3Smrg/* Initialize module-independent parts of the context.
557ec681f3Smrg *
567ec681f3Smrg * The caller is responsible for initializing ctx::module and ctx::builder.
577ec681f3Smrg */
587ec681f3Smrgvoid ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
597ec681f3Smrg                          enum chip_class chip_class, enum radeon_family family,
607ec681f3Smrg                          const struct radeon_info *info,
617ec681f3Smrg                          enum ac_float_mode float_mode, unsigned wave_size,
627ec681f3Smrg                          unsigned ballot_mask_bits)
637ec681f3Smrg{
647ec681f3Smrg   ctx->context = LLVMContextCreate();
657ec681f3Smrg
667ec681f3Smrg   ctx->chip_class = chip_class;
677ec681f3Smrg   ctx->family = family;
687ec681f3Smrg   ctx->info = info;
697ec681f3Smrg   ctx->wave_size = wave_size;
707ec681f3Smrg   ctx->ballot_mask_bits = ballot_mask_bits;
717ec681f3Smrg   ctx->float_mode = float_mode;
727ec681f3Smrg   ctx->module = ac_create_module(compiler->tm, ctx->context);
737ec681f3Smrg   ctx->builder = ac_create_builder(ctx->context, float_mode);
747ec681f3Smrg
757ec681f3Smrg   ctx->voidt = LLVMVoidTypeInContext(ctx->context);
767ec681f3Smrg   ctx->i1 = LLVMInt1TypeInContext(ctx->context);
777ec681f3Smrg   ctx->i8 = LLVMInt8TypeInContext(ctx->context);
787ec681f3Smrg   ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
797ec681f3Smrg   ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
807ec681f3Smrg   ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
817ec681f3Smrg   ctx->i128 = LLVMIntTypeInContext(ctx->context, 128);
827ec681f3Smrg   ctx->intptr = ctx->i32;
837ec681f3Smrg   ctx->f16 = LLVMHalfTypeInContext(ctx->context);
847ec681f3Smrg   ctx->f32 = LLVMFloatTypeInContext(ctx->context);
857ec681f3Smrg   ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
867ec681f3Smrg   ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
877ec681f3Smrg   ctx->v4i16 = LLVMVectorType(ctx->i16, 4);
887ec681f3Smrg   ctx->v2f16 = LLVMVectorType(ctx->f16, 2);
897ec681f3Smrg   ctx->v4f16 = LLVMVectorType(ctx->f16, 4);
907ec681f3Smrg   ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
917ec681f3Smrg   ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
927ec681f3Smrg   ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
937ec681f3Smrg   ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
947ec681f3Smrg   ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
957ec681f3Smrg   ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
967ec681f3Smrg   ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
977ec681f3Smrg   ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
987ec681f3Smrg   ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
997ec681f3Smrg
1007ec681f3Smrg   ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
1017ec681f3Smrg   ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
1027ec681f3Smrg   ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
1037ec681f3Smrg   ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
1047ec681f3Smrg   ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
1057ec681f3Smrg   ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
1067ec681f3Smrg   ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
1077ec681f3Smrg   ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
1087ec681f3Smrg   ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false);
1097ec681f3Smrg   ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false);
1107ec681f3Smrg   ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
1117ec681f3Smrg   ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
1127ec681f3Smrg   ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
1137ec681f3Smrg   ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
1147ec681f3Smrg   ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
1157ec681f3Smrg   ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
1167ec681f3Smrg
1177ec681f3Smrg   ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
1187ec681f3Smrg   ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
1197ec681f3Smrg
1207ec681f3Smrg   ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, "range", 5);
1217ec681f3Smrg
1227ec681f3Smrg   ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, "invariant.load", 14);
1237ec681f3Smrg
1247ec681f3Smrg   ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
1257ec681f3Smrg
1267ec681f3Smrg   ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
1277ec681f3Smrg   ctx->flow = calloc(1, sizeof(*ctx->flow));
1287ec681f3Smrg}
1297ec681f3Smrg
1307ec681f3Smrgvoid ac_llvm_context_dispose(struct ac_llvm_context *ctx)
1317ec681f3Smrg{
1327ec681f3Smrg   free(ctx->flow->stack);
1337ec681f3Smrg   free(ctx->flow);
1347ec681f3Smrg   ctx->flow = NULL;
1357ec681f3Smrg}
1367ec681f3Smrg
1377ec681f3Smrgint ac_get_llvm_num_components(LLVMValueRef value)
1387ec681f3Smrg{
1397ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(value);
1407ec681f3Smrg   unsigned num_components =
1417ec681f3Smrg      LLVMGetTypeKind(type) == LLVMVectorTypeKind ? LLVMGetVectorSize(type) : 1;
1427ec681f3Smrg   return num_components;
1437ec681f3Smrg}
1447ec681f3Smrg
1457ec681f3SmrgLLVMValueRef ac_llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index)
1467ec681f3Smrg{
1477ec681f3Smrg   if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
1487ec681f3Smrg      assert(index == 0);
1497ec681f3Smrg      return value;
1507ec681f3Smrg   }
1517ec681f3Smrg
1527ec681f3Smrg   return LLVMBuildExtractElement(ac->builder, value, LLVMConstInt(ac->i32, index, false), "");
1537ec681f3Smrg}
1547ec681f3Smrg
1557ec681f3Smrgint ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
1567ec681f3Smrg{
1577ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1587ec681f3Smrg      type = LLVMGetElementType(type);
1597ec681f3Smrg
1607ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
1617ec681f3Smrg      return LLVMGetIntTypeWidth(type);
1627ec681f3Smrg
1637ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
1647ec681f3Smrg      if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_LDS)
1657ec681f3Smrg         return 32;
1667ec681f3Smrg   }
1677ec681f3Smrg
1687ec681f3Smrg   if (type == ctx->f16)
1697ec681f3Smrg      return 16;
1707ec681f3Smrg   if (type == ctx->f32)
1717ec681f3Smrg      return 32;
1727ec681f3Smrg   if (type == ctx->f64)
1737ec681f3Smrg      return 64;
1747ec681f3Smrg
1757ec681f3Smrg   unreachable("Unhandled type kind in get_elem_bits");
1767ec681f3Smrg}
1777ec681f3Smrg
1787ec681f3Smrgunsigned ac_get_type_size(LLVMTypeRef type)
1797ec681f3Smrg{
1807ec681f3Smrg   LLVMTypeKind kind = LLVMGetTypeKind(type);
1817ec681f3Smrg
1827ec681f3Smrg   switch (kind) {
1837ec681f3Smrg   case LLVMIntegerTypeKind:
1847ec681f3Smrg      return LLVMGetIntTypeWidth(type) / 8;
1857ec681f3Smrg   case LLVMHalfTypeKind:
1867ec681f3Smrg      return 2;
1877ec681f3Smrg   case LLVMFloatTypeKind:
1887ec681f3Smrg      return 4;
1897ec681f3Smrg   case LLVMDoubleTypeKind:
1907ec681f3Smrg      return 8;
1917ec681f3Smrg   case LLVMPointerTypeKind:
1927ec681f3Smrg      if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
1937ec681f3Smrg         return 4;
1947ec681f3Smrg      return 8;
1957ec681f3Smrg   case LLVMVectorTypeKind:
1967ec681f3Smrg      return LLVMGetVectorSize(type) * ac_get_type_size(LLVMGetElementType(type));
1977ec681f3Smrg   case LLVMArrayTypeKind:
1987ec681f3Smrg      return LLVMGetArrayLength(type) * ac_get_type_size(LLVMGetElementType(type));
1997ec681f3Smrg   default:
2007ec681f3Smrg      assert(0);
2017ec681f3Smrg      return 0;
2027ec681f3Smrg   }
2037ec681f3Smrg}
2047ec681f3Smrg
2057ec681f3Smrgstatic LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
2067ec681f3Smrg{
2077ec681f3Smrg   if (t == ctx->i1)
2087ec681f3Smrg      return ctx->i1;
2097ec681f3Smrg   else if (t == ctx->i8)
2107ec681f3Smrg      return ctx->i8;
2117ec681f3Smrg   else if (t == ctx->f16 || t == ctx->i16)
2127ec681f3Smrg      return ctx->i16;
2137ec681f3Smrg   else if (t == ctx->f32 || t == ctx->i32)
2147ec681f3Smrg      return ctx->i32;
2157ec681f3Smrg   else if (t == ctx->f64 || t == ctx->i64)
2167ec681f3Smrg      return ctx->i64;
2177ec681f3Smrg   else
2187ec681f3Smrg      unreachable("Unhandled integer size");
2197ec681f3Smrg}
2207ec681f3Smrg
2217ec681f3SmrgLLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
2227ec681f3Smrg{
2237ec681f3Smrg   if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
2247ec681f3Smrg      LLVMTypeRef elem_type = LLVMGetElementType(t);
2257ec681f3Smrg      return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), LLVMGetVectorSize(t));
2267ec681f3Smrg   }
2277ec681f3Smrg   if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
2287ec681f3Smrg      switch (LLVMGetPointerAddressSpace(t)) {
2297ec681f3Smrg      case AC_ADDR_SPACE_GLOBAL:
2307ec681f3Smrg         return ctx->i64;
2317ec681f3Smrg      case AC_ADDR_SPACE_CONST_32BIT:
2327ec681f3Smrg      case AC_ADDR_SPACE_LDS:
2337ec681f3Smrg         return ctx->i32;
2347ec681f3Smrg      default:
2357ec681f3Smrg         unreachable("unhandled address space");
2367ec681f3Smrg      }
2377ec681f3Smrg   }
2387ec681f3Smrg   return to_integer_type_scalar(ctx, t);
2397ec681f3Smrg}
2407ec681f3Smrg
2417ec681f3SmrgLLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
2427ec681f3Smrg{
2437ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(v);
2447ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
2457ec681f3Smrg      return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
2467ec681f3Smrg   }
2477ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
2487ec681f3Smrg}
2497ec681f3Smrg
2507ec681f3SmrgLLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
2517ec681f3Smrg{
2527ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(v);
2537ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
2547ec681f3Smrg      return v;
2557ec681f3Smrg   return ac_to_integer(ctx, v);
2567ec681f3Smrg}
2577ec681f3Smrg
2587ec681f3Smrgstatic LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
2597ec681f3Smrg{
2607ec681f3Smrg   if (t == ctx->i8)
2617ec681f3Smrg      return ctx->i8;
2627ec681f3Smrg   else if (t == ctx->i16 || t == ctx->f16)
2637ec681f3Smrg      return ctx->f16;
2647ec681f3Smrg   else if (t == ctx->i32 || t == ctx->f32)
2657ec681f3Smrg      return ctx->f32;
2667ec681f3Smrg   else if (t == ctx->i64 || t == ctx->f64)
2677ec681f3Smrg      return ctx->f64;
2687ec681f3Smrg   else
2697ec681f3Smrg      unreachable("Unhandled float size");
2707ec681f3Smrg}
2717ec681f3Smrg
2727ec681f3SmrgLLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
2737ec681f3Smrg{
2747ec681f3Smrg   if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
2757ec681f3Smrg      LLVMTypeRef elem_type = LLVMGetElementType(t);
2767ec681f3Smrg      return LLVMVectorType(to_float_type_scalar(ctx, elem_type), LLVMGetVectorSize(t));
2777ec681f3Smrg   }
2787ec681f3Smrg   return to_float_type_scalar(ctx, t);
2797ec681f3Smrg}
2807ec681f3Smrg
2817ec681f3SmrgLLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
2827ec681f3Smrg{
2837ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(v);
2847ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
2857ec681f3Smrg}
2867ec681f3Smrg
2877ec681f3SmrgLLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
2887ec681f3Smrg                                LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count,
2897ec681f3Smrg                                unsigned attrib_mask)
2907ec681f3Smrg{
2917ec681f3Smrg   LLVMValueRef function, call;
2927ec681f3Smrg   bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
2937ec681f3Smrg
2947ec681f3Smrg   function = LLVMGetNamedFunction(ctx->module, name);
2957ec681f3Smrg   if (!function) {
2967ec681f3Smrg      LLVMTypeRef param_types[32], function_type;
2977ec681f3Smrg      unsigned i;
2987ec681f3Smrg
2997ec681f3Smrg      assert(param_count <= 32);
3007ec681f3Smrg
3017ec681f3Smrg      for (i = 0; i < param_count; ++i) {
3027ec681f3Smrg         assert(params[i]);
3037ec681f3Smrg         param_types[i] = LLVMTypeOf(params[i]);
3047ec681f3Smrg      }
3057ec681f3Smrg      function_type = LLVMFunctionType(return_type, param_types, param_count, 0);
3067ec681f3Smrg      function = LLVMAddFunction(ctx->module, name, function_type);
3077ec681f3Smrg
3087ec681f3Smrg      LLVMSetFunctionCallConv(function, LLVMCCallConv);
3097ec681f3Smrg      LLVMSetLinkage(function, LLVMExternalLinkage);
3107ec681f3Smrg
3117ec681f3Smrg      if (!set_callsite_attrs)
3127ec681f3Smrg         ac_add_func_attributes(ctx->context, function, attrib_mask);
3137ec681f3Smrg   }
3147ec681f3Smrg
3157ec681f3Smrg   call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
3167ec681f3Smrg   if (set_callsite_attrs)
3177ec681f3Smrg      ac_add_func_attributes(ctx->context, call, attrib_mask);
3187ec681f3Smrg   return call;
3197ec681f3Smrg}
3207ec681f3Smrg
3217ec681f3Smrg/**
3227ec681f3Smrg * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
3237ec681f3Smrg * intrinsic names).
3247ec681f3Smrg */
3257ec681f3Smrgvoid ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
3267ec681f3Smrg{
3277ec681f3Smrg   LLVMTypeRef elem_type = type;
3287ec681f3Smrg
3297ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMStructTypeKind) {
3307ec681f3Smrg      unsigned count = LLVMCountStructElementTypes(type);
3317ec681f3Smrg      int ret = snprintf(buf, bufsize, "sl_");
3327ec681f3Smrg      buf += ret;
3337ec681f3Smrg      bufsize -= ret;
3347ec681f3Smrg
3357ec681f3Smrg      LLVMTypeRef *elems = alloca(count * sizeof(LLVMTypeRef));
3367ec681f3Smrg      LLVMGetStructElementTypes(type, elems);
3377ec681f3Smrg
3387ec681f3Smrg      for (unsigned i = 0; i < count; i++) {
3397ec681f3Smrg         ac_build_type_name_for_intr(elems[i], buf, bufsize);
3407ec681f3Smrg         ret = strlen(buf);
3417ec681f3Smrg         buf += ret;
3427ec681f3Smrg         bufsize -= ret;
3437ec681f3Smrg      }
3447ec681f3Smrg
3457ec681f3Smrg      snprintf(buf, bufsize, "s");
3467ec681f3Smrg      return;
3477ec681f3Smrg   }
3487ec681f3Smrg
3497ec681f3Smrg   assert(bufsize >= 8);
3507ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
3517ec681f3Smrg      int ret = snprintf(buf, bufsize, "v%u", LLVMGetVectorSize(type));
3527ec681f3Smrg      if (ret < 0) {
3537ec681f3Smrg         char *type_name = LLVMPrintTypeToString(type);
3547ec681f3Smrg         fprintf(stderr, "Error building type name for: %s\n", type_name);
3557ec681f3Smrg         LLVMDisposeMessage(type_name);
3567ec681f3Smrg         return;
3577ec681f3Smrg      }
3587ec681f3Smrg      elem_type = LLVMGetElementType(type);
3597ec681f3Smrg      buf += ret;
3607ec681f3Smrg      bufsize -= ret;
3617ec681f3Smrg   }
3627ec681f3Smrg   switch (LLVMGetTypeKind(elem_type)) {
3637ec681f3Smrg   default:
3647ec681f3Smrg      break;
3657ec681f3Smrg   case LLVMIntegerTypeKind:
3667ec681f3Smrg      snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
3677ec681f3Smrg      break;
3687ec681f3Smrg   case LLVMHalfTypeKind:
3697ec681f3Smrg      snprintf(buf, bufsize, "f16");
3707ec681f3Smrg      break;
3717ec681f3Smrg   case LLVMFloatTypeKind:
3727ec681f3Smrg      snprintf(buf, bufsize, "f32");
3737ec681f3Smrg      break;
3747ec681f3Smrg   case LLVMDoubleTypeKind:
3757ec681f3Smrg      snprintf(buf, bufsize, "f64");
3767ec681f3Smrg      break;
3777ec681f3Smrg   }
3787ec681f3Smrg}
3797ec681f3Smrg
3807ec681f3Smrg/**
3817ec681f3Smrg * Helper function that builds an LLVM IR PHI node and immediately adds
3827ec681f3Smrg * incoming edges.
3837ec681f3Smrg */
3847ec681f3SmrgLLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigned count_incoming,
3857ec681f3Smrg                          LLVMValueRef *values, LLVMBasicBlockRef *blocks)
3867ec681f3Smrg{
3877ec681f3Smrg   LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
3887ec681f3Smrg   LLVMAddIncoming(phi, values, blocks, count_incoming);
3897ec681f3Smrg   return phi;
3907ec681f3Smrg}
3917ec681f3Smrg
3927ec681f3Smrgvoid ac_build_s_barrier(struct ac_llvm_context *ctx)
3937ec681f3Smrg{
3947ec681f3Smrg   ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
3957ec681f3Smrg}
3967ec681f3Smrg
3977ec681f3Smrg/* Prevent optimizations (at least of memory accesses) across the current
3987ec681f3Smrg * point in the program by emitting empty inline assembly that is marked as
3997ec681f3Smrg * having side effects.
4007ec681f3Smrg *
4017ec681f3Smrg * Optionally, a value can be passed through the inline assembly to prevent
4027ec681f3Smrg * LLVM from hoisting calls to ReadNone functions.
4037ec681f3Smrg */
4047ec681f3Smrgvoid ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pgpr, bool sgpr)
4057ec681f3Smrg{
4067ec681f3Smrg   static int counter = 0;
4077ec681f3Smrg
4087ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
4097ec681f3Smrg   char code[16];
4107ec681f3Smrg   const char *constraint = sgpr ? "=s,0" : "=v,0";
4117ec681f3Smrg
4127ec681f3Smrg   snprintf(code, sizeof(code), "; %d", (int)p_atomic_inc_return(&counter));
4137ec681f3Smrg
4147ec681f3Smrg   if (!pgpr) {
4157ec681f3Smrg      LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
4167ec681f3Smrg      LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
4177ec681f3Smrg      LLVMBuildCall(builder, inlineasm, NULL, 0, "");
4187ec681f3Smrg   } else if (LLVMTypeOf(*pgpr) == ctx->i32) {
4197ec681f3Smrg      /* Simple version for i32 that allows the caller to set LLVM metadata on the call
4207ec681f3Smrg       * instruction. */
4217ec681f3Smrg      LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
4227ec681f3Smrg      LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
4237ec681f3Smrg
4247ec681f3Smrg      *pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, "");
4257ec681f3Smrg   } else if (LLVMTypeOf(*pgpr) == ctx->i16) {
4267ec681f3Smrg      /* Simple version for i16 that allows the caller to set LLVM metadata on the call
4277ec681f3Smrg       * instruction. */
4287ec681f3Smrg      LLVMTypeRef ftype = LLVMFunctionType(ctx->i16, &ctx->i16, 1, false);
4297ec681f3Smrg      LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
4307ec681f3Smrg
4317ec681f3Smrg      *pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, "");
4327ec681f3Smrg   } else if (LLVMGetTypeKind(LLVMTypeOf(*pgpr)) == LLVMPointerTypeKind) {
4337ec681f3Smrg      LLVMTypeRef type = LLVMTypeOf(*pgpr);
4347ec681f3Smrg      LLVMTypeRef ftype = LLVMFunctionType(type, &type, 1, false);
4357ec681f3Smrg      LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
4367ec681f3Smrg
4377ec681f3Smrg      *pgpr = LLVMBuildCall(builder, inlineasm, pgpr, 1, "");
4387ec681f3Smrg   } else {
4397ec681f3Smrg      LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
4407ec681f3Smrg      LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, constraint, true, false);
4417ec681f3Smrg      LLVMTypeRef type = LLVMTypeOf(*pgpr);
4427ec681f3Smrg      unsigned bitsize = ac_get_elem_bits(ctx, type);
4437ec681f3Smrg      LLVMValueRef vgpr = *pgpr;
4447ec681f3Smrg      LLVMTypeRef vgpr_type;
4457ec681f3Smrg      unsigned vgpr_size;
4467ec681f3Smrg      LLVMValueRef vgpr0;
4477ec681f3Smrg
4487ec681f3Smrg      if (bitsize < 32)
4497ec681f3Smrg         vgpr = LLVMBuildZExt(ctx->builder, vgpr, ctx->i32, "");
4507ec681f3Smrg
4517ec681f3Smrg      vgpr_type = LLVMTypeOf(vgpr);
4527ec681f3Smrg      vgpr_size = ac_get_type_size(vgpr_type);
4537ec681f3Smrg
4547ec681f3Smrg      assert(vgpr_size % 4 == 0);
4557ec681f3Smrg
4567ec681f3Smrg      vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
4577ec681f3Smrg      vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
4587ec681f3Smrg      vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
4597ec681f3Smrg      vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
4607ec681f3Smrg      vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
4617ec681f3Smrg
4627ec681f3Smrg      if (bitsize < 32)
4637ec681f3Smrg         vgpr = LLVMBuildTrunc(builder, vgpr, type, "");
4647ec681f3Smrg
4657ec681f3Smrg      *pgpr = vgpr;
4667ec681f3Smrg   }
4677ec681f3Smrg}
4687ec681f3Smrg
4697ec681f3SmrgLLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope)
4707ec681f3Smrg{
4717ec681f3Smrg   const char *subgroup = "llvm.readcyclecounter";
4727ec681f3Smrg   const char *name = scope == NIR_SCOPE_DEVICE ? "llvm.amdgcn.s.memrealtime" : subgroup;
4737ec681f3Smrg
4747ec681f3Smrg   LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0);
4757ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
4767ec681f3Smrg}
4777ec681f3Smrg
4787ec681f3SmrgLLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value)
4797ec681f3Smrg{
4807ec681f3Smrg   const char *name;
4817ec681f3Smrg
4827ec681f3Smrg   if (LLVMTypeOf(value) == ctx->i1)
4837ec681f3Smrg      value = LLVMBuildZExt(ctx->builder, value, ctx->i32, "");
4847ec681f3Smrg
4857ec681f3Smrg   if (ctx->wave_size == 64)
4867ec681f3Smrg      name = "llvm.amdgcn.icmp.i64.i32";
4877ec681f3Smrg   else
4887ec681f3Smrg      name = "llvm.amdgcn.icmp.i32.i32";
4897ec681f3Smrg
4907ec681f3Smrg   LLVMValueRef args[3] = {value, ctx->i32_0, LLVMConstInt(ctx->i32, LLVMIntNE, 0)};
4917ec681f3Smrg
4927ec681f3Smrg   /* We currently have no other way to prevent LLVM from lifting the icmp
4937ec681f3Smrg    * calls to a dominating basic block.
4947ec681f3Smrg    */
4957ec681f3Smrg   ac_build_optimization_barrier(ctx, &args[0], false);
4967ec681f3Smrg
4977ec681f3Smrg   args[0] = ac_to_integer(ctx, args[0]);
4987ec681f3Smrg
4997ec681f3Smrg   return ac_build_intrinsic(
5007ec681f3Smrg      ctx, name, ctx->iN_wavemask, args, 3,
5017ec681f3Smrg      AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
5027ec681f3Smrg}
5037ec681f3Smrg
5047ec681f3SmrgLLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMValueRef value)
5057ec681f3Smrg{
5067ec681f3Smrg   const char *name;
5077ec681f3Smrg
5087ec681f3Smrg   if (ctx->wave_size == 64)
5097ec681f3Smrg      name = "llvm.amdgcn.icmp.i64.i1";
5107ec681f3Smrg   else
5117ec681f3Smrg      name = "llvm.amdgcn.icmp.i32.i1";
5127ec681f3Smrg
5137ec681f3Smrg   LLVMValueRef args[3] = {
5147ec681f3Smrg      value,
5157ec681f3Smrg      ctx->i1false,
5167ec681f3Smrg      LLVMConstInt(ctx->i32, LLVMIntNE, 0),
5177ec681f3Smrg   };
5187ec681f3Smrg
5197ec681f3Smrg   return ac_build_intrinsic(
5207ec681f3Smrg      ctx, name, ctx->iN_wavemask, args, 3,
5217ec681f3Smrg      AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
5227ec681f3Smrg}
5237ec681f3Smrg
5247ec681f3SmrgLLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
5257ec681f3Smrg{
5267ec681f3Smrg   LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
5277ec681f3Smrg   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
5287ec681f3Smrg   return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
5297ec681f3Smrg}
5307ec681f3Smrg
5317ec681f3SmrgLLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
5327ec681f3Smrg{
5337ec681f3Smrg   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
5347ec681f3Smrg   return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0),
5357ec681f3Smrg                        "");
5367ec681f3Smrg}
5377ec681f3Smrg
5387ec681f3SmrgLLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
5397ec681f3Smrg{
5407ec681f3Smrg   LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
5417ec681f3Smrg   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
5427ec681f3Smrg
5437ec681f3Smrg   LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
5447ec681f3Smrg   LLVMValueRef none =
5457ec681f3Smrg      LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
5467ec681f3Smrg   return LLVMBuildOr(ctx->builder, all, none, "");
5477ec681f3Smrg}
5487ec681f3Smrg
5497ec681f3SmrgLLVMValueRef ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
5507ec681f3Smrg                                            unsigned value_count, unsigned component)
5517ec681f3Smrg{
5527ec681f3Smrg   LLVMValueRef vec = NULL;
5537ec681f3Smrg
5547ec681f3Smrg   if (value_count == 1) {
5557ec681f3Smrg      return values[component];
5567ec681f3Smrg   } else if (!value_count)
5577ec681f3Smrg      unreachable("value_count is 0");
5587ec681f3Smrg
5597ec681f3Smrg   for (unsigned i = component; i < value_count + component; i++) {
5607ec681f3Smrg      LLVMValueRef value = values[i];
5617ec681f3Smrg
5627ec681f3Smrg      if (i == component)
5637ec681f3Smrg         vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count));
5647ec681f3Smrg      LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
5657ec681f3Smrg      vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
5667ec681f3Smrg   }
5677ec681f3Smrg   return vec;
5687ec681f3Smrg}
5697ec681f3Smrg
5707ec681f3SmrgLLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values,
5717ec681f3Smrg                                             unsigned value_count, unsigned value_stride, bool load,
5727ec681f3Smrg                                             bool always_vector)
5737ec681f3Smrg{
5747ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
5757ec681f3Smrg   LLVMValueRef vec = NULL;
5767ec681f3Smrg   unsigned i;
5777ec681f3Smrg
5787ec681f3Smrg   if (value_count == 1 && !always_vector) {
5797ec681f3Smrg      if (load)
5807ec681f3Smrg         return LLVMBuildLoad(builder, values[0], "");
5817ec681f3Smrg      return values[0];
5827ec681f3Smrg   } else if (!value_count)
5837ec681f3Smrg      unreachable("value_count is 0");
5847ec681f3Smrg
5857ec681f3Smrg   for (i = 0; i < value_count; i++) {
5867ec681f3Smrg      LLVMValueRef value = values[i * value_stride];
5877ec681f3Smrg      if (load)
5887ec681f3Smrg         value = LLVMBuildLoad(builder, value, "");
5897ec681f3Smrg
5907ec681f3Smrg      if (!i)
5917ec681f3Smrg         vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count));
5927ec681f3Smrg      LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
5937ec681f3Smrg      vec = LLVMBuildInsertElement(builder, vec, value, index, "");
5947ec681f3Smrg   }
5957ec681f3Smrg   return vec;
5967ec681f3Smrg}
5977ec681f3Smrg
5987ec681f3SmrgLLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
5997ec681f3Smrg                                    unsigned value_count)
6007ec681f3Smrg{
6017ec681f3Smrg   return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
6027ec681f3Smrg}
6037ec681f3Smrg
6047ec681f3SmrgLLVMValueRef ac_build_concat(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
6057ec681f3Smrg{
6067ec681f3Smrg   unsigned a_size = ac_get_llvm_num_components(a);
6077ec681f3Smrg   unsigned b_size = ac_get_llvm_num_components(b);
6087ec681f3Smrg
6097ec681f3Smrg   LLVMValueRef *elems = alloca((a_size + b_size) * sizeof(LLVMValueRef));
6107ec681f3Smrg   for (unsigned i = 0; i < a_size; i++)
6117ec681f3Smrg      elems[i] = ac_llvm_extract_elem(ctx, a, i);
6127ec681f3Smrg   for (unsigned i = 0; i < b_size; i++)
6137ec681f3Smrg      elems[a_size + i] = ac_llvm_extract_elem(ctx, b, i);
6147ec681f3Smrg
6157ec681f3Smrg   return ac_build_gather_values(ctx, elems, a_size + b_size);
6167ec681f3Smrg}
6177ec681f3Smrg
6187ec681f3Smrg/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
6197ec681f3Smrg * channels with undef. Extract at most src_channels components from the input.
6207ec681f3Smrg */
6217ec681f3SmrgLLVMValueRef ac_build_expand(struct ac_llvm_context *ctx, LLVMValueRef value,
6227ec681f3Smrg                             unsigned src_channels, unsigned dst_channels)
6237ec681f3Smrg{
6247ec681f3Smrg   LLVMTypeRef elemtype;
6257ec681f3Smrg   LLVMValueRef *const chan = alloca(dst_channels * sizeof(LLVMValueRef));
6267ec681f3Smrg
6277ec681f3Smrg   if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
6287ec681f3Smrg      unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
6297ec681f3Smrg
6307ec681f3Smrg      if (src_channels == dst_channels && vec_size == dst_channels)
6317ec681f3Smrg         return value;
6327ec681f3Smrg
6337ec681f3Smrg      src_channels = MIN2(src_channels, vec_size);
6347ec681f3Smrg
6357ec681f3Smrg      for (unsigned i = 0; i < src_channels; i++)
6367ec681f3Smrg         chan[i] = ac_llvm_extract_elem(ctx, value, i);
6377ec681f3Smrg
6387ec681f3Smrg      elemtype = LLVMGetElementType(LLVMTypeOf(value));
6397ec681f3Smrg   } else {
6407ec681f3Smrg      if (src_channels) {
6417ec681f3Smrg         assert(src_channels == 1);
6427ec681f3Smrg         chan[0] = value;
6437ec681f3Smrg      }
6447ec681f3Smrg      elemtype = LLVMTypeOf(value);
6457ec681f3Smrg   }
6467ec681f3Smrg
6477ec681f3Smrg   for (unsigned i = src_channels; i < dst_channels; i++)
6487ec681f3Smrg      chan[i] = LLVMGetUndef(elemtype);
6497ec681f3Smrg
6507ec681f3Smrg   return ac_build_gather_values(ctx, chan, dst_channels);
6517ec681f3Smrg}
6527ec681f3Smrg
6537ec681f3Smrg/* Extract components [start, start + channels) from a vector.
6547ec681f3Smrg */
6557ec681f3SmrgLLVMValueRef ac_extract_components(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned start,
6567ec681f3Smrg                                   unsigned channels)
6577ec681f3Smrg{
6587ec681f3Smrg   LLVMValueRef *const chan = alloca(channels * sizeof(LLVMValueRef));
6597ec681f3Smrg
6607ec681f3Smrg   for (unsigned i = 0; i < channels; i++)
6617ec681f3Smrg      chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
6627ec681f3Smrg
6637ec681f3Smrg   return ac_build_gather_values(ctx, chan, channels);
6647ec681f3Smrg}
6657ec681f3Smrg
6667ec681f3Smrg/* Expand a scalar or vector to <4 x type> by filling the remaining channels
6677ec681f3Smrg * with undef. Extract at most num_channels components from the input.
6687ec681f3Smrg */
6697ec681f3SmrgLLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value,
6707ec681f3Smrg                                     unsigned num_channels)
6717ec681f3Smrg{
6727ec681f3Smrg   return ac_build_expand(ctx, value, num_channels, 4);
6737ec681f3Smrg}
6747ec681f3Smrg
6757ec681f3SmrgLLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
6767ec681f3Smrg{
6777ec681f3Smrg   unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
6787ec681f3Smrg   const char *name;
6797ec681f3Smrg
6807ec681f3Smrg   if (type_size == 2)
6817ec681f3Smrg      name = "llvm.rint.f16";
6827ec681f3Smrg   else if (type_size == 4)
6837ec681f3Smrg      name = "llvm.rint.f32";
6847ec681f3Smrg   else
6857ec681f3Smrg      name = "llvm.rint.f64";
6867ec681f3Smrg
6877ec681f3Smrg   return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1, AC_FUNC_ATTR_READNONE);
6887ec681f3Smrg}
6897ec681f3Smrg
6907ec681f3SmrgLLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den)
6917ec681f3Smrg{
6927ec681f3Smrg   unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
6937ec681f3Smrg   const char *name;
6947ec681f3Smrg
6957ec681f3Smrg   /* For doubles, we need precise division to pass GLCTS. */
6967ec681f3Smrg   if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && type_size == 8)
6977ec681f3Smrg      return LLVMBuildFDiv(ctx->builder, num, den, "");
6987ec681f3Smrg
6997ec681f3Smrg   if (type_size == 2)
7007ec681f3Smrg      name = "llvm.amdgcn.rcp.f16";
7017ec681f3Smrg   else if (type_size == 4)
7027ec681f3Smrg      name = "llvm.amdgcn.rcp.f32";
7037ec681f3Smrg   else
7047ec681f3Smrg      name = "llvm.amdgcn.rcp.f64";
7057ec681f3Smrg
7067ec681f3Smrg   LLVMValueRef rcp =
7077ec681f3Smrg      ac_build_intrinsic(ctx, name, LLVMTypeOf(den), &den, 1, AC_FUNC_ATTR_READNONE);
7087ec681f3Smrg
7097ec681f3Smrg   return LLVMBuildFMul(ctx->builder, num, rcp, "");
7107ec681f3Smrg}
7117ec681f3Smrg
7127ec681f3Smrg/* See fast_idiv_by_const.h. */
7137ec681f3Smrg/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
7147ec681f3SmrgLLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, LLVMValueRef num,
7157ec681f3Smrg                                LLVMValueRef multiplier, LLVMValueRef pre_shift,
7167ec681f3Smrg                                LLVMValueRef post_shift, LLVMValueRef increment)
7177ec681f3Smrg{
7187ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
7197ec681f3Smrg
7207ec681f3Smrg   num = LLVMBuildLShr(builder, num, pre_shift, "");
7217ec681f3Smrg   num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
7227ec681f3Smrg                      LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
7237ec681f3Smrg   num = LLVMBuildAdd(builder, num, LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
7247ec681f3Smrg   num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
7257ec681f3Smrg   num = LLVMBuildTrunc(builder, num, ctx->i32, "");
7267ec681f3Smrg   return LLVMBuildLShr(builder, num, post_shift, "");
7277ec681f3Smrg}
7287ec681f3Smrg
7297ec681f3Smrg/* See fast_idiv_by_const.h. */
7307ec681f3Smrg/* If num != UINT_MAX, this more efficient version can be used. */
7317ec681f3Smrg/* Set: increment = util_fast_udiv_info::increment; */
7327ec681f3SmrgLLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef num,
7337ec681f3Smrg                                    LLVMValueRef multiplier, LLVMValueRef pre_shift,
7347ec681f3Smrg                                    LLVMValueRef post_shift, LLVMValueRef increment)
7357ec681f3Smrg{
7367ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
7377ec681f3Smrg
7387ec681f3Smrg   num = LLVMBuildLShr(builder, num, pre_shift, "");
7397ec681f3Smrg   num = LLVMBuildNUWAdd(builder, num, increment, "");
7407ec681f3Smrg   num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
7417ec681f3Smrg                      LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
7427ec681f3Smrg   num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
7437ec681f3Smrg   num = LLVMBuildTrunc(builder, num, ctx->i32, "");
7447ec681f3Smrg   return LLVMBuildLShr(builder, num, post_shift, "");
7457ec681f3Smrg}
7467ec681f3Smrg
7477ec681f3Smrg/* See fast_idiv_by_const.h. */
7487ec681f3Smrg/* Both operands must fit in 31 bits and the divisor must not be 1. */
7497ec681f3SmrgLLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num,
7507ec681f3Smrg                                              LLVMValueRef multiplier, LLVMValueRef post_shift)
7517ec681f3Smrg{
7527ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
7537ec681f3Smrg
7547ec681f3Smrg   num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
7557ec681f3Smrg                      LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
7567ec681f3Smrg   num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
7577ec681f3Smrg   num = LLVMBuildTrunc(builder, num, ctx->i32, "");
7587ec681f3Smrg   return LLVMBuildLShr(builder, num, post_shift, "");
7597ec681f3Smrg}
7607ec681f3Smrg
7617ec681f3Smrg/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
7627ec681f3Smrg * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
7637ec681f3Smrg * already multiplied by two. id is the cube face number.
7647ec681f3Smrg */
7657ec681f3Smrgstruct cube_selection_coords {
7667ec681f3Smrg   LLVMValueRef stc[2];
7677ec681f3Smrg   LLVMValueRef ma;
7687ec681f3Smrg   LLVMValueRef id;
7697ec681f3Smrg};
7707ec681f3Smrg
7717ec681f3Smrgstatic void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3],
7727ec681f3Smrg                                 struct cube_selection_coords *out)
7737ec681f3Smrg{
7747ec681f3Smrg   LLVMTypeRef f32 = ctx->f32;
7757ec681f3Smrg
7767ec681f3Smrg   out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, AC_FUNC_ATTR_READNONE);
7777ec681f3Smrg   out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, AC_FUNC_ATTR_READNONE);
7787ec681f3Smrg   out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, AC_FUNC_ATTR_READNONE);
7797ec681f3Smrg   out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, AC_FUNC_ATTR_READNONE);
7807ec681f3Smrg}
7817ec681f3Smrg
7827ec681f3Smrg/**
7837ec681f3Smrg * Build a manual selection sequence for cube face sc/tc coordinates and
7847ec681f3Smrg * major axis vector (multiplied by 2 for consistency) for the given
7857ec681f3Smrg * vec3 \p coords, for the face implied by \p selcoords.
7867ec681f3Smrg *
7877ec681f3Smrg * For the major axis, we always adjust the sign to be in the direction of
7887ec681f3Smrg * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
7897ec681f3Smrg * the selcoords major axis.
7907ec681f3Smrg */
7917ec681f3Smrgstatic void build_cube_select(struct ac_llvm_context *ctx,
7927ec681f3Smrg                              const struct cube_selection_coords *selcoords,
7937ec681f3Smrg                              const LLVMValueRef *coords, LLVMValueRef *out_st,
7947ec681f3Smrg                              LLVMValueRef *out_ma)
7957ec681f3Smrg{
7967ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
7977ec681f3Smrg   LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
7987ec681f3Smrg   LLVMValueRef is_ma_positive;
7997ec681f3Smrg   LLVMValueRef sgn_ma;
8007ec681f3Smrg   LLVMValueRef is_ma_z, is_not_ma_z;
8017ec681f3Smrg   LLVMValueRef is_ma_y;
8027ec681f3Smrg   LLVMValueRef is_ma_x;
8037ec681f3Smrg   LLVMValueRef sgn;
8047ec681f3Smrg   LLVMValueRef tmp;
8057ec681f3Smrg
8067ec681f3Smrg   is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), "");
8077ec681f3Smrg   sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0),
8087ec681f3Smrg                            LLVMConstReal(f32, -1.0), "");
8097ec681f3Smrg
8107ec681f3Smrg   is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
8117ec681f3Smrg   is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
8127ec681f3Smrg   is_ma_y = LLVMBuildAnd(
8137ec681f3Smrg      builder, is_not_ma_z,
8147ec681f3Smrg      LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
8157ec681f3Smrg   is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
8167ec681f3Smrg
8177ec681f3Smrg   /* Select sc */
8187ec681f3Smrg   tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
8197ec681f3Smrg   sgn = LLVMBuildSelect(
8207ec681f3Smrg      builder, is_ma_y, LLVMConstReal(f32, 1.0),
8217ec681f3Smrg      LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
8227ec681f3Smrg   out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
8237ec681f3Smrg
8247ec681f3Smrg   /* Select tc */
8257ec681f3Smrg   tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
8267ec681f3Smrg   sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), "");
8277ec681f3Smrg   out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
8287ec681f3Smrg
8297ec681f3Smrg   /* Select ma */
8307ec681f3Smrg   tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
8317ec681f3Smrg                         LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
8327ec681f3Smrg   tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
8337ec681f3Smrg   *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
8347ec681f3Smrg}
8357ec681f3Smrg
8367ec681f3Smrgvoid ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
8377ec681f3Smrg                            LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg)
8387ec681f3Smrg{
8397ec681f3Smrg
8407ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
8417ec681f3Smrg   struct cube_selection_coords selcoords;
8427ec681f3Smrg   LLVMValueRef coords[3];
8437ec681f3Smrg   LLVMValueRef invma;
8447ec681f3Smrg
8457ec681f3Smrg   if (is_array && !is_lod) {
8467ec681f3Smrg      LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
8477ec681f3Smrg
8487ec681f3Smrg      /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
8497ec681f3Smrg       *
8507ec681f3Smrg       *    "For Array forms, the array layer used will be
8517ec681f3Smrg       *
8527ec681f3Smrg       *       max(0, min(d−1, floor(layer+0.5)))
8537ec681f3Smrg       *
8547ec681f3Smrg       *     where d is the depth of the texture array and layer
8557ec681f3Smrg       *     comes from the component indicated in the tables below.
8567ec681f3Smrg       *     Workaroudn for an issue where the layer is taken from a
8577ec681f3Smrg       *     helper invocation which happens to fall on a different
8587ec681f3Smrg       *     layer due to extrapolation."
8597ec681f3Smrg       *
8607ec681f3Smrg       * GFX8 and earlier attempt to implement this in hardware by
8617ec681f3Smrg       * clamping the value of coords[2] = (8 * layer) + face.
8627ec681f3Smrg       * Unfortunately, this means that the we end up with the wrong
8637ec681f3Smrg       * face when clamping occurs.
8647ec681f3Smrg       *
8657ec681f3Smrg       * Clamp the layer earlier to work around the issue.
8667ec681f3Smrg       */
8677ec681f3Smrg      if (ctx->chip_class <= GFX8) {
8687ec681f3Smrg         LLVMValueRef ge0;
8697ec681f3Smrg         ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
8707ec681f3Smrg         tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
8717ec681f3Smrg      }
8727ec681f3Smrg
8737ec681f3Smrg      coords_arg[3] = tmp;
8747ec681f3Smrg   }
8757ec681f3Smrg
8767ec681f3Smrg   build_cube_intrinsic(ctx, coords_arg, &selcoords);
8777ec681f3Smrg
8787ec681f3Smrg   invma =
8797ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
8807ec681f3Smrg   invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
8817ec681f3Smrg
8827ec681f3Smrg   for (int i = 0; i < 2; ++i)
8837ec681f3Smrg      coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
8847ec681f3Smrg
8857ec681f3Smrg   coords[2] = selcoords.id;
8867ec681f3Smrg
8877ec681f3Smrg   if (is_deriv && derivs_arg) {
8887ec681f3Smrg      LLVMValueRef derivs[4];
8897ec681f3Smrg      int axis;
8907ec681f3Smrg
8917ec681f3Smrg      /* Convert cube derivatives to 2D derivatives. */
8927ec681f3Smrg      for (axis = 0; axis < 2; axis++) {
8937ec681f3Smrg         LLVMValueRef deriv_st[2];
8947ec681f3Smrg         LLVMValueRef deriv_ma;
8957ec681f3Smrg
8967ec681f3Smrg         /* Transform the derivative alongside the texture
8977ec681f3Smrg          * coordinate. Mathematically, the correct formula is
8987ec681f3Smrg          * as follows. Assume we're projecting onto the +Z face
8997ec681f3Smrg          * and denote by dx/dh the derivative of the (original)
9007ec681f3Smrg          * X texture coordinate with respect to horizontal
9017ec681f3Smrg          * window coordinates. The projection onto the +Z face
9027ec681f3Smrg          * plane is:
9037ec681f3Smrg          *
9047ec681f3Smrg          *   f(x,z) = x/z
9057ec681f3Smrg          *
9067ec681f3Smrg          * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
9077ec681f3Smrg          *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
9087ec681f3Smrg          *
9097ec681f3Smrg          * This motivatives the implementation below.
9107ec681f3Smrg          *
9117ec681f3Smrg          * Whether this actually gives the expected results for
9127ec681f3Smrg          * apps that might feed in derivatives obtained via
9137ec681f3Smrg          * finite differences is anyone's guess. The OpenGL spec
9147ec681f3Smrg          * seems awfully quiet about how textureGrad for cube
9157ec681f3Smrg          * maps should be handled.
9167ec681f3Smrg          */
9177ec681f3Smrg         build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma);
9187ec681f3Smrg
9197ec681f3Smrg         deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
9207ec681f3Smrg
9217ec681f3Smrg         for (int i = 0; i < 2; ++i)
9227ec681f3Smrg            derivs[axis * 2 + i] =
9237ec681f3Smrg               LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""),
9247ec681f3Smrg                             LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
9257ec681f3Smrg      }
9267ec681f3Smrg
9277ec681f3Smrg      memcpy(derivs_arg, derivs, sizeof(derivs));
9287ec681f3Smrg   }
9297ec681f3Smrg
9307ec681f3Smrg   /* Shift the texture coordinate. This must be applied after the
9317ec681f3Smrg    * derivative calculation.
9327ec681f3Smrg    */
9337ec681f3Smrg   for (int i = 0; i < 2; ++i)
9347ec681f3Smrg      coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
9357ec681f3Smrg
9367ec681f3Smrg   if (is_array) {
9377ec681f3Smrg      /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
9387ec681f3Smrg      /* coords_arg.w component - array_index for cube arrays */
9397ec681f3Smrg      coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
9407ec681f3Smrg   }
9417ec681f3Smrg
9427ec681f3Smrg   memcpy(coords_arg, coords, sizeof(coords));
9437ec681f3Smrg}
9447ec681f3Smrg
9457ec681f3SmrgLLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
9467ec681f3Smrg                                LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
9477ec681f3Smrg                                LLVMValueRef j)
9487ec681f3Smrg{
9497ec681f3Smrg   LLVMValueRef args[5];
9507ec681f3Smrg   LLVMValueRef p1;
9517ec681f3Smrg
9527ec681f3Smrg   args[0] = i;
9537ec681f3Smrg   args[1] = llvm_chan;
9547ec681f3Smrg   args[2] = attr_number;
9557ec681f3Smrg   args[3] = params;
9567ec681f3Smrg
9577ec681f3Smrg   p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
9587ec681f3Smrg
9597ec681f3Smrg   args[0] = p1;
9607ec681f3Smrg   args[1] = j;
9617ec681f3Smrg   args[2] = llvm_chan;
9627ec681f3Smrg   args[3] = attr_number;
9637ec681f3Smrg   args[4] = params;
9647ec681f3Smrg
9657ec681f3Smrg   return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", ctx->f32, args, 5,
9667ec681f3Smrg                             AC_FUNC_ATTR_READNONE);
9677ec681f3Smrg}
9687ec681f3Smrg
9697ec681f3SmrgLLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
9707ec681f3Smrg                                    LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
9717ec681f3Smrg                                    LLVMValueRef j, bool high_16bits)
9727ec681f3Smrg{
9737ec681f3Smrg   LLVMValueRef args[6];
9747ec681f3Smrg   LLVMValueRef p1;
9757ec681f3Smrg
9767ec681f3Smrg   args[0] = i;
9777ec681f3Smrg   args[1] = llvm_chan;
9787ec681f3Smrg   args[2] = attr_number;
9797ec681f3Smrg   args[3] = high_16bits ? ctx->i1true : ctx->i1false;
9807ec681f3Smrg   args[4] = params;
9817ec681f3Smrg
9827ec681f3Smrg   p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", ctx->f32, args, 5,
9837ec681f3Smrg                           AC_FUNC_ATTR_READNONE);
9847ec681f3Smrg
9857ec681f3Smrg   args[0] = p1;
9867ec681f3Smrg   args[1] = j;
9877ec681f3Smrg   args[2] = llvm_chan;
9887ec681f3Smrg   args[3] = attr_number;
9897ec681f3Smrg   args[4] = high_16bits ? ctx->i1true : ctx->i1false;
9907ec681f3Smrg   args[5] = params;
9917ec681f3Smrg
9927ec681f3Smrg   return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", ctx->f16, args, 6,
9937ec681f3Smrg                             AC_FUNC_ATTR_READNONE);
9947ec681f3Smrg}
9957ec681f3Smrg
9967ec681f3SmrgLLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter,
9977ec681f3Smrg                                    LLVMValueRef llvm_chan, LLVMValueRef attr_number,
9987ec681f3Smrg                                    LLVMValueRef params)
9997ec681f3Smrg{
10007ec681f3Smrg   LLVMValueRef args[4];
10017ec681f3Smrg
10027ec681f3Smrg   args[0] = parameter;
10037ec681f3Smrg   args[1] = llvm_chan;
10047ec681f3Smrg   args[2] = attr_number;
10057ec681f3Smrg   args[3] = params;
10067ec681f3Smrg
10077ec681f3Smrg   return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", ctx->f32, args, 4,
10087ec681f3Smrg                             AC_FUNC_ATTR_READNONE);
10097ec681f3Smrg}
10107ec681f3Smrg
10117ec681f3SmrgLLVMValueRef ac_build_gep_ptr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
10127ec681f3Smrg                              LLVMValueRef index)
10137ec681f3Smrg{
10147ec681f3Smrg   return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
10157ec681f3Smrg}
10167ec681f3Smrg
10177ec681f3SmrgLLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index)
10187ec681f3Smrg{
10197ec681f3Smrg   LLVMValueRef indices[2] = {
10207ec681f3Smrg      ctx->i32_0,
10217ec681f3Smrg      index,
10227ec681f3Smrg   };
10237ec681f3Smrg   return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
10247ec681f3Smrg}
10257ec681f3Smrg
10267ec681f3SmrgLLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMValueRef index)
10277ec681f3Smrg{
10287ec681f3Smrg   return LLVMBuildPointerCast(ctx->builder, LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""),
10297ec681f3Smrg                               LLVMTypeOf(ptr), "");
10307ec681f3Smrg}
10317ec681f3Smrg
10327ec681f3Smrgvoid ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index,
10337ec681f3Smrg                            LLVMValueRef value)
10347ec681f3Smrg{
10357ec681f3Smrg   LLVMBuildStore(ctx->builder, value, ac_build_gep0(ctx, base_ptr, index));
10367ec681f3Smrg}
10377ec681f3Smrg
10387ec681f3Smrg/**
10397ec681f3Smrg * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
10407ec681f3Smrg * It's equivalent to doing a load from &base_ptr[index].
10417ec681f3Smrg *
10427ec681f3Smrg * \param base_ptr  Where the array starts.
10437ec681f3Smrg * \param index     The element index into the array.
10447ec681f3Smrg * \param uniform   Whether the base_ptr and index can be assumed to be
10457ec681f3Smrg *                  dynamically uniform (i.e. load to an SGPR)
10467ec681f3Smrg * \param invariant Whether the load is invariant (no other opcodes affect it)
10477ec681f3Smrg * \param no_unsigned_wraparound
10487ec681f3Smrg *    For all possible re-associations and re-distributions of an expression
10497ec681f3Smrg *    "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
10507ec681f3Smrg *    without inbounds in base_ptr), this parameter is true if "addr + offset"
10517ec681f3Smrg *    does not result in an unsigned integer wraparound. This is used for
10527ec681f3Smrg *    optimal code generation of 32-bit pointer arithmetic.
10537ec681f3Smrg *
10547ec681f3Smrg *    For example, a 32-bit immediate offset that causes a 32-bit unsigned
10557ec681f3Smrg *    integer wraparound can't be an imm offset in s_load_dword, because
10567ec681f3Smrg *    the instruction performs "addr + offset" in 64 bits.
10577ec681f3Smrg *
10587ec681f3Smrg *    Expected usage for bindless textures by chaining GEPs:
10597ec681f3Smrg *      // possible unsigned wraparound, don't use InBounds:
10607ec681f3Smrg *      ptr1 = LLVMBuildGEP(base_ptr, index);
10617ec681f3Smrg *      image = load(ptr1); // becomes "s_load ptr1, 0"
10627ec681f3Smrg *
10637ec681f3Smrg *      ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
10647ec681f3Smrg *      sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
10657ec681f3Smrg */
10667ec681f3Smrgstatic LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
10677ec681f3Smrg                                         LLVMValueRef index, bool uniform, bool invariant,
10687ec681f3Smrg                                         bool no_unsigned_wraparound)
10697ec681f3Smrg{
10707ec681f3Smrg   LLVMValueRef pointer, result;
10717ec681f3Smrg
10727ec681f3Smrg   if (no_unsigned_wraparound &&
10737ec681f3Smrg       LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
10747ec681f3Smrg      pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
10757ec681f3Smrg   else
10767ec681f3Smrg      pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
10777ec681f3Smrg
10787ec681f3Smrg   if (uniform)
10797ec681f3Smrg      LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
10807ec681f3Smrg   result = LLVMBuildLoad(ctx->builder, pointer, "");
10817ec681f3Smrg   if (invariant)
10827ec681f3Smrg      LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
10837ec681f3Smrg   LLVMSetAlignment(result, 4);
10847ec681f3Smrg   return result;
10857ec681f3Smrg}
10867ec681f3Smrg
10877ec681f3SmrgLLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index)
10887ec681f3Smrg{
10897ec681f3Smrg   return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
10907ec681f3Smrg}
10917ec681f3Smrg
10927ec681f3SmrgLLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
10937ec681f3Smrg                                     LLVMValueRef index)
10947ec681f3Smrg{
10957ec681f3Smrg   return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
10967ec681f3Smrg}
10977ec681f3Smrg
10987ec681f3Smrg/* This assumes that there is no unsigned integer wraparound during the address
10997ec681f3Smrg * computation, excluding all GEPs within base_ptr. */
11007ec681f3SmrgLLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
11017ec681f3Smrg                                   LLVMValueRef index)
11027ec681f3Smrg{
11037ec681f3Smrg   return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
11047ec681f3Smrg}
11057ec681f3Smrg
11067ec681f3Smrg/* See ac_build_load_custom() documentation. */
11077ec681f3SmrgLLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
11087ec681f3Smrg                                                   LLVMValueRef base_ptr, LLVMValueRef index)
11097ec681f3Smrg{
11107ec681f3Smrg   return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
11117ec681f3Smrg}
11127ec681f3Smrg
11137ec681f3Smrgstatic unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
11147ec681f3Smrg{
11157ec681f3Smrg   return cache_policy | (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
11167ec681f3Smrg}
11177ec681f3Smrg
11187ec681f3Smrgstatic void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
11197ec681f3Smrg                                         LLVMValueRef data, LLVMValueRef vindex,
11207ec681f3Smrg                                         LLVMValueRef voffset, LLVMValueRef soffset,
11217ec681f3Smrg                                         unsigned cache_policy, bool use_format, bool structurized)
11227ec681f3Smrg{
11237ec681f3Smrg   LLVMValueRef args[6];
11247ec681f3Smrg   int idx = 0;
11257ec681f3Smrg   args[idx++] = data;
11267ec681f3Smrg   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
11277ec681f3Smrg   if (structurized)
11287ec681f3Smrg      args[idx++] = vindex ? vindex : ctx->i32_0;
11297ec681f3Smrg   args[idx++] = voffset ? voffset : ctx->i32_0;
11307ec681f3Smrg   args[idx++] = soffset ? soffset : ctx->i32_0;
11317ec681f3Smrg   args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
11327ec681f3Smrg   const char *indexing_kind = structurized ? "struct" : "raw";
11337ec681f3Smrg   char name[256], type_name[8];
11347ec681f3Smrg
11357ec681f3Smrg   ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name));
11367ec681f3Smrg
11377ec681f3Smrg   if (use_format) {
11387ec681f3Smrg      snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", indexing_kind,
11397ec681f3Smrg               type_name);
11407ec681f3Smrg   } else {
11417ec681f3Smrg      snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s", indexing_kind, type_name);
11427ec681f3Smrg   }
11437ec681f3Smrg
11447ec681f3Smrg   ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
11457ec681f3Smrg}
11467ec681f3Smrg
11477ec681f3Smrgvoid ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
11487ec681f3Smrg                                  LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy)
11497ec681f3Smrg{
11507ec681f3Smrg   ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true, true);
11517ec681f3Smrg}
11527ec681f3Smrg
11537ec681f3Smrg/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
11547ec681f3Smrg * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
11557ec681f3Smrg * or v4i32 (num_channels=3,4).
11567ec681f3Smrg */
11577ec681f3Smrgvoid ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
11587ec681f3Smrg                                 unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset,
11597ec681f3Smrg                                 unsigned inst_offset, unsigned cache_policy)
11607ec681f3Smrg{
11617ec681f3Smrg   /* Split 3 channel stores. */
11627ec681f3Smrg   if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
11637ec681f3Smrg      LLVMValueRef v[3], v01;
11647ec681f3Smrg
11657ec681f3Smrg      for (int i = 0; i < 3; i++) {
11667ec681f3Smrg         v[i] = LLVMBuildExtractElement(ctx->builder, vdata, LLVMConstInt(ctx->i32, i, 0), "");
11677ec681f3Smrg      }
11687ec681f3Smrg      v01 = ac_build_gather_values(ctx, v, 2);
11697ec681f3Smrg
11707ec681f3Smrg      ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, soffset, inst_offset, cache_policy);
11717ec681f3Smrg      ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, soffset, inst_offset + 8,
11727ec681f3Smrg                                  cache_policy);
11737ec681f3Smrg      return;
11747ec681f3Smrg   }
11757ec681f3Smrg
11767ec681f3Smrg   /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
11777ec681f3Smrg    * (voffset is swizzled, but soffset isn't swizzled).
11787ec681f3Smrg    * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
11797ec681f3Smrg    */
11807ec681f3Smrg   if (!(cache_policy & ac_swizzled)) {
11817ec681f3Smrg      LLVMValueRef offset = soffset;
11827ec681f3Smrg
11837ec681f3Smrg      if (inst_offset)
11847ec681f3Smrg         offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, inst_offset, 0), "");
11857ec681f3Smrg
11867ec681f3Smrg      ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), ctx->i32_0, voffset, offset,
11877ec681f3Smrg                                   cache_policy, false, false);
11887ec681f3Smrg      return;
11897ec681f3Smrg   }
11907ec681f3Smrg
11917ec681f3Smrg   static const unsigned dfmts[] = {V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32,
11927ec681f3Smrg                                    V_008F0C_BUF_DATA_FORMAT_32_32_32,
11937ec681f3Smrg                                    V_008F0C_BUF_DATA_FORMAT_32_32_32_32};
11947ec681f3Smrg   unsigned dfmt = dfmts[num_channels - 1];
11957ec681f3Smrg   unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
11967ec681f3Smrg   LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
11977ec681f3Smrg
11987ec681f3Smrg   ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset, immoffset, num_channels, dfmt,
11997ec681f3Smrg                              nfmt, cache_policy);
12007ec681f3Smrg}
12017ec681f3Smrg
12027ec681f3Smrgstatic LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
12037ec681f3Smrg                                                LLVMValueRef vindex, LLVMValueRef voffset,
12047ec681f3Smrg                                                LLVMValueRef soffset, unsigned num_channels,
12057ec681f3Smrg                                                LLVMTypeRef channel_type, unsigned cache_policy,
12067ec681f3Smrg                                                bool can_speculate, bool use_format,
12077ec681f3Smrg                                                bool structurized)
12087ec681f3Smrg{
12097ec681f3Smrg   LLVMValueRef args[5];
12107ec681f3Smrg   int idx = 0;
12117ec681f3Smrg   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
12127ec681f3Smrg   if (structurized)
12137ec681f3Smrg      args[idx++] = vindex ? vindex : ctx->i32_0;
12147ec681f3Smrg   args[idx++] = voffset ? voffset : ctx->i32_0;
12157ec681f3Smrg   args[idx++] = soffset ? soffset : ctx->i32_0;
12167ec681f3Smrg   args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
12177ec681f3Smrg   unsigned func =
12187ec681f3Smrg      !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
12197ec681f3Smrg   const char *indexing_kind = structurized ? "struct" : "raw";
12207ec681f3Smrg   char name[256], type_name[8];
12217ec681f3Smrg
12227ec681f3Smrg   /* D16 is only supported on gfx8+ */
12237ec681f3Smrg   assert(!use_format || (channel_type != ctx->f16 && channel_type != ctx->i16) ||
12247ec681f3Smrg          ctx->chip_class >= GFX8);
12257ec681f3Smrg
12267ec681f3Smrg   LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
12277ec681f3Smrg   ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
12287ec681f3Smrg
12297ec681f3Smrg   if (use_format) {
12307ec681f3Smrg      snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", indexing_kind,
12317ec681f3Smrg               type_name);
12327ec681f3Smrg   } else {
12337ec681f3Smrg      snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", indexing_kind, type_name);
12347ec681f3Smrg   }
12357ec681f3Smrg
12367ec681f3Smrg   return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate));
12377ec681f3Smrg}
12387ec681f3Smrg
12397ec681f3SmrgLLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
12407ec681f3Smrg                                  LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset,
12417ec681f3Smrg                                  unsigned inst_offset, LLVMTypeRef channel_type,
12427ec681f3Smrg                                  unsigned cache_policy, bool can_speculate, bool allow_smem)
12437ec681f3Smrg{
12447ec681f3Smrg   LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
12457ec681f3Smrg   if (voffset)
12467ec681f3Smrg      offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
12477ec681f3Smrg   if (soffset)
12487ec681f3Smrg      offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
12497ec681f3Smrg
12507ec681f3Smrg   if (allow_smem && !(cache_policy & ac_slc) &&
12517ec681f3Smrg       (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
12527ec681f3Smrg      assert(vindex == NULL);
12537ec681f3Smrg
12547ec681f3Smrg      LLVMValueRef result[8];
12557ec681f3Smrg
12567ec681f3Smrg      for (int i = 0; i < num_channels; i++) {
12577ec681f3Smrg         if (i) {
12587ec681f3Smrg            offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), "");
12597ec681f3Smrg         }
12607ec681f3Smrg         LLVMValueRef args[3] = {
12617ec681f3Smrg            rsrc,
12627ec681f3Smrg            offset,
12637ec681f3Smrg            LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
12647ec681f3Smrg         };
12657ec681f3Smrg         result[i] = ac_build_intrinsic(ctx, "llvm.amdgcn.s.buffer.load.f32", ctx->f32, args, 3,
12667ec681f3Smrg                                        AC_FUNC_ATTR_READNONE);
12677ec681f3Smrg      }
12687ec681f3Smrg      if (num_channels == 1)
12697ec681f3Smrg         return result[0];
12707ec681f3Smrg
12717ec681f3Smrg      if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false))
12727ec681f3Smrg         result[num_channels++] = LLVMGetUndef(ctx->f32);
12737ec681f3Smrg      return ac_build_gather_values(ctx, result, num_channels);
12747ec681f3Smrg   }
12757ec681f3Smrg
12767ec681f3Smrg   return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, ctx->i32_0, num_channels,
12777ec681f3Smrg                                      channel_type, cache_policy, can_speculate, false, false);
12787ec681f3Smrg}
12797ec681f3Smrg
12807ec681f3SmrgLLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
12817ec681f3Smrg                                         LLVMValueRef vindex, LLVMValueRef voffset,
12827ec681f3Smrg                                         unsigned num_channels, unsigned cache_policy,
12837ec681f3Smrg                                         bool can_speculate, bool d16, bool tfe)
12847ec681f3Smrg{
12857ec681f3Smrg   if (tfe) {
12867ec681f3Smrg      assert(!d16);
12877ec681f3Smrg
12887ec681f3Smrg      char code[256];
12897ec681f3Smrg      /* The definition in the assembly and the one in the constraint string
12907ec681f3Smrg       * differs because of an assembler bug.
12917ec681f3Smrg       */
12927ec681f3Smrg      snprintf(code, sizeof(code),
12937ec681f3Smrg               "v_mov_b32 v0, 0\n"
12947ec681f3Smrg               "v_mov_b32 v1, 0\n"
12957ec681f3Smrg               "v_mov_b32 v2, 0\n"
12967ec681f3Smrg               "v_mov_b32 v3, 0\n"
12977ec681f3Smrg               "v_mov_b32 v4, 0\n"
12987ec681f3Smrg               "buffer_load_format_xyzw v[0:3], $1, $2, 0, idxen offen %s %s tfe %s\n"
12997ec681f3Smrg               "s_waitcnt vmcnt(0)",
13007ec681f3Smrg               cache_policy & ac_glc ? "glc" : "",
13017ec681f3Smrg               cache_policy & ac_slc ? "slc" : "",
13027ec681f3Smrg               cache_policy & ac_dlc ? "dlc" : "");
13037ec681f3Smrg
13047ec681f3Smrg      LLVMTypeRef param_types[] = {ctx->v2i32, ctx->v4i32};
13057ec681f3Smrg      LLVMTypeRef calltype = LLVMFunctionType(LLVMVectorType(ctx->f32, 5), param_types, 2, false);
13067ec681f3Smrg      LLVMValueRef inlineasm = LLVMConstInlineAsm(calltype, code, "=&{v[0:4]},v,s", false, false);
13077ec681f3Smrg
13087ec681f3Smrg      LLVMValueRef addr_comp[2] = {vindex ? vindex : ctx->i32_0,
13097ec681f3Smrg                                   voffset ? voffset : ctx->i32_0};
13107ec681f3Smrg
13117ec681f3Smrg      LLVMValueRef args[] = {ac_build_gather_values(ctx, addr_comp, 2),
13127ec681f3Smrg                             LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "")};
13137ec681f3Smrg      LLVMValueRef res = LLVMBuildCall(ctx->builder, inlineasm, args, 2, "");
13147ec681f3Smrg
13157ec681f3Smrg      return ac_build_concat(ctx, ac_trim_vector(ctx, res, num_channels),
13167ec681f3Smrg                             ac_llvm_extract_elem(ctx, res, 4));
13177ec681f3Smrg   }
13187ec681f3Smrg
13197ec681f3Smrg   return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, num_channels,
13207ec681f3Smrg                                      d16 ? ctx->f16 : ctx->f32, cache_policy, can_speculate, true,
13217ec681f3Smrg                                      true);
13227ec681f3Smrg}
13237ec681f3Smrg
13247ec681f3Smrgstatic LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
13257ec681f3Smrg                                          LLVMValueRef vindex, LLVMValueRef voffset,
13267ec681f3Smrg                                          LLVMValueRef soffset, LLVMValueRef immoffset,
13277ec681f3Smrg                                          unsigned num_channels, unsigned dfmt, unsigned nfmt,
13287ec681f3Smrg                                          unsigned cache_policy, bool can_speculate,
13297ec681f3Smrg                                          bool structurized)
13307ec681f3Smrg{
13317ec681f3Smrg   voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
13327ec681f3Smrg
13337ec681f3Smrg   LLVMValueRef args[6];
13347ec681f3Smrg   int idx = 0;
13357ec681f3Smrg   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
13367ec681f3Smrg   if (structurized)
13377ec681f3Smrg      args[idx++] = vindex ? vindex : ctx->i32_0;
13387ec681f3Smrg   args[idx++] = voffset ? voffset : ctx->i32_0;
13397ec681f3Smrg   args[idx++] = soffset ? soffset : ctx->i32_0;
13407ec681f3Smrg   args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
13417ec681f3Smrg   args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
13427ec681f3Smrg   unsigned func =
13437ec681f3Smrg      !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
13447ec681f3Smrg   const char *indexing_kind = structurized ? "struct" : "raw";
13457ec681f3Smrg   char name[256], type_name[8];
13467ec681f3Smrg
13477ec681f3Smrg   LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
13487ec681f3Smrg   ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
13497ec681f3Smrg
13507ec681f3Smrg   snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s", indexing_kind, type_name);
13517ec681f3Smrg
13527ec681f3Smrg   return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate));
13537ec681f3Smrg}
13547ec681f3Smrg
13557ec681f3SmrgLLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
13567ec681f3Smrg                                          LLVMValueRef vindex, LLVMValueRef voffset,
13577ec681f3Smrg                                          LLVMValueRef soffset, LLVMValueRef immoffset,
13587ec681f3Smrg                                          unsigned num_channels, unsigned dfmt, unsigned nfmt,
13597ec681f3Smrg                                          unsigned cache_policy, bool can_speculate)
13607ec681f3Smrg{
13617ec681f3Smrg   return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset, immoffset, num_channels, dfmt,
13627ec681f3Smrg                                nfmt, cache_policy, can_speculate, true);
13637ec681f3Smrg}
13647ec681f3Smrg
13657ec681f3SmrgLLVMValueRef ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
13667ec681f3Smrg                                         LLVMValueRef voffset, LLVMValueRef soffset,
13677ec681f3Smrg                                         LLVMValueRef immoffset, unsigned cache_policy)
13687ec681f3Smrg{
13697ec681f3Smrg   voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
13707ec681f3Smrg
13717ec681f3Smrg   return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i16,
13727ec681f3Smrg                                      cache_policy, false, false, false);
13737ec681f3Smrg}
13747ec681f3Smrg
13757ec681f3SmrgLLVMValueRef ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
13767ec681f3Smrg                                        LLVMValueRef voffset, LLVMValueRef soffset,
13777ec681f3Smrg                                        LLVMValueRef immoffset, unsigned cache_policy)
13787ec681f3Smrg{
13797ec681f3Smrg   voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
13807ec681f3Smrg
13817ec681f3Smrg   return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i8, cache_policy,
13827ec681f3Smrg                                      false, false, false);
13837ec681f3Smrg}
13847ec681f3Smrg
13857ec681f3Smrg/**
13867ec681f3Smrg * Convert an 11- or 10-bit unsigned floating point number to an f32.
13877ec681f3Smrg *
13887ec681f3Smrg * The input exponent is expected to be biased analogous to IEEE-754, i.e. by
13897ec681f3Smrg * 2^(exp_bits-1) - 1 (as defined in OpenGL and other graphics APIs).
13907ec681f3Smrg */
13917ec681f3Smrgstatic LLVMValueRef ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src,
13927ec681f3Smrg                                    unsigned exp_bits, unsigned mant_bits)
13937ec681f3Smrg{
13947ec681f3Smrg   assert(LLVMTypeOf(src) == ctx->i32);
13957ec681f3Smrg
13967ec681f3Smrg   LLVMValueRef tmp;
13977ec681f3Smrg   LLVMValueRef mantissa;
13987ec681f3Smrg   mantissa =
13997ec681f3Smrg      LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), "");
14007ec681f3Smrg
14017ec681f3Smrg   /* Converting normal numbers is just a shift + correcting the exponent bias */
14027ec681f3Smrg   unsigned normal_shift = 23 - mant_bits;
14037ec681f3Smrg   unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1);
14047ec681f3Smrg   LLVMValueRef shifted, normal;
14057ec681f3Smrg
14067ec681f3Smrg   shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), "");
14077ec681f3Smrg   normal =
14087ec681f3Smrg      LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), "");
14097ec681f3Smrg
14107ec681f3Smrg   /* Converting nan/inf numbers is the same, but with a different exponent update */
14117ec681f3Smrg   LLVMValueRef naninf;
14127ec681f3Smrg   naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), "");
14137ec681f3Smrg
14147ec681f3Smrg   /* Converting denormals is the complex case: determine the leading zeros of the
14157ec681f3Smrg    * mantissa to obtain the correct shift for the mantissa and exponent correction.
14167ec681f3Smrg    */
14177ec681f3Smrg   LLVMValueRef denormal;
14187ec681f3Smrg   LLVMValueRef params[2] = {
14197ec681f3Smrg      mantissa, ctx->i1true, /* result can be undef when arg is 0 */
14207ec681f3Smrg   };
14217ec681f3Smrg   LLVMValueRef ctlz =
14227ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
14237ec681f3Smrg
14247ec681f3Smrg   /* Shift such that the leading 1 ends up as the LSB of the exponent field. */
14257ec681f3Smrg   tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), "");
14267ec681f3Smrg   denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, "");
14277ec681f3Smrg
14287ec681f3Smrg   unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1;
14297ec681f3Smrg   tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, "");
14307ec681f3Smrg   tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), "");
14317ec681f3Smrg   denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, "");
14327ec681f3Smrg
14337ec681f3Smrg   /* Select the final result. */
14347ec681f3Smrg   LLVMValueRef result;
14357ec681f3Smrg
14367ec681f3Smrg   tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
14377ec681f3Smrg                       LLVMConstInt(ctx->i32, ((1ULL << exp_bits) - 1) << mant_bits, false), "");
14387ec681f3Smrg   result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, "");
14397ec681f3Smrg
14407ec681f3Smrg   tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
14417ec681f3Smrg                       LLVMConstInt(ctx->i32, 1ULL << mant_bits, false), "");
14427ec681f3Smrg   result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, "");
14437ec681f3Smrg
14447ec681f3Smrg   tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, "");
14457ec681f3Smrg   result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, "");
14467ec681f3Smrg
14477ec681f3Smrg   return ac_to_float(ctx, result);
14487ec681f3Smrg}
14497ec681f3Smrg
14507ec681f3Smrg/**
14517ec681f3Smrg * Generate a fully general open coded buffer format fetch with all required
14527ec681f3Smrg * fixups suitable for vertex fetch, using non-format buffer loads.
14537ec681f3Smrg *
14547ec681f3Smrg * Some combinations of argument values have special interpretations:
14557ec681f3Smrg * - size = 8 bytes, format = fixed indicates PIPE_FORMAT_R11G11B10_FLOAT
14567ec681f3Smrg * - size = 8 bytes, format != {float,fixed} indicates a 2_10_10_10 data format
14577ec681f3Smrg *
14587ec681f3Smrg * \param log_size log(size of channel in bytes)
14597ec681f3Smrg * \param num_channels number of channels (1 to 4)
14607ec681f3Smrg * \param format AC_FETCH_FORMAT_xxx value
14617ec681f3Smrg * \param reverse whether XYZ channels are reversed
14627ec681f3Smrg * \param known_aligned whether the source is known to be aligned to hardware's
14637ec681f3Smrg *                      effective element size for loading the given format
14647ec681f3Smrg *                      (note: this means dword alignment for 8_8_8_8, 16_16, etc.)
14657ec681f3Smrg * \param rsrc buffer resource descriptor
14667ec681f3Smrg * \return the resulting vector of floats or integers bitcast to <4 x i32>
14677ec681f3Smrg */
14687ec681f3SmrgLLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size,
14697ec681f3Smrg                                            unsigned num_channels, unsigned format, bool reverse,
14707ec681f3Smrg                                            bool known_aligned, LLVMValueRef rsrc,
14717ec681f3Smrg                                            LLVMValueRef vindex, LLVMValueRef voffset,
14727ec681f3Smrg                                            LLVMValueRef soffset, unsigned cache_policy,
14737ec681f3Smrg                                            bool can_speculate)
14747ec681f3Smrg{
14757ec681f3Smrg   LLVMValueRef tmp;
14767ec681f3Smrg   unsigned load_log_size = log_size;
14777ec681f3Smrg   unsigned load_num_channels = num_channels;
14787ec681f3Smrg   if (log_size == 3) {
14797ec681f3Smrg      load_log_size = 2;
14807ec681f3Smrg      if (format == AC_FETCH_FORMAT_FLOAT) {
14817ec681f3Smrg         load_num_channels = 2 * num_channels;
14827ec681f3Smrg      } else {
14837ec681f3Smrg         load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */
14847ec681f3Smrg      }
14857ec681f3Smrg   }
14867ec681f3Smrg
14877ec681f3Smrg   int log_recombine = 0;
14887ec681f3Smrg   if ((ctx->chip_class == GFX6 || ctx->chip_class >= GFX10) && !known_aligned) {
14897ec681f3Smrg      /* Avoid alignment restrictions by loading one byte at a time. */
14907ec681f3Smrg      load_num_channels <<= load_log_size;
14917ec681f3Smrg      log_recombine = load_log_size;
14927ec681f3Smrg      load_log_size = 0;
14937ec681f3Smrg   } else if (load_num_channels == 2 || load_num_channels == 4) {
14947ec681f3Smrg      log_recombine = -util_logbase2(load_num_channels);
14957ec681f3Smrg      load_num_channels = 1;
14967ec681f3Smrg      load_log_size += -log_recombine;
14977ec681f3Smrg   }
14987ec681f3Smrg
14997ec681f3Smrg   LLVMValueRef loads[32]; /* up to 32 bytes */
15007ec681f3Smrg   for (unsigned i = 0; i < load_num_channels; ++i) {
15017ec681f3Smrg      tmp =
15027ec681f3Smrg         LLVMBuildAdd(ctx->builder, soffset, LLVMConstInt(ctx->i32, i << load_log_size, false), "");
15037ec681f3Smrg      LLVMTypeRef channel_type =
15047ec681f3Smrg         load_log_size == 0 ? ctx->i8 : load_log_size == 1 ? ctx->i16 : ctx->i32;
15057ec681f3Smrg      unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2);
15067ec681f3Smrg      loads[i] =
15077ec681f3Smrg         ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, tmp, num_channels, channel_type,
15087ec681f3Smrg                                     cache_policy, can_speculate, false, true);
15097ec681f3Smrg      if (load_log_size >= 2)
15107ec681f3Smrg         loads[i] = ac_to_integer(ctx, loads[i]);
15117ec681f3Smrg   }
15127ec681f3Smrg
15137ec681f3Smrg   if (log_recombine > 0) {
15147ec681f3Smrg      /* Recombine bytes if necessary (GFX6 only) */
15157ec681f3Smrg      LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
15167ec681f3Smrg
15177ec681f3Smrg      for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) {
15187ec681f3Smrg         LLVMValueRef accum = NULL;
15197ec681f3Smrg         for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) {
15207ec681f3Smrg            tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, "");
15217ec681f3Smrg            if (i == 0) {
15227ec681f3Smrg               accum = tmp;
15237ec681f3Smrg            } else {
15247ec681f3Smrg               tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(dst_type, 8 * i, false), "");
15257ec681f3Smrg               accum = LLVMBuildOr(ctx->builder, accum, tmp, "");
15267ec681f3Smrg            }
15277ec681f3Smrg         }
15287ec681f3Smrg         loads[dst] = accum;
15297ec681f3Smrg      }
15307ec681f3Smrg   } else if (log_recombine < 0) {
15317ec681f3Smrg      /* Split vectors of dwords */
15327ec681f3Smrg      if (load_log_size > 2) {
15337ec681f3Smrg         assert(load_num_channels == 1);
15347ec681f3Smrg         LLVMValueRef loaded = loads[0];
15357ec681f3Smrg         unsigned log_split = load_log_size - 2;
15367ec681f3Smrg         log_recombine += log_split;
15377ec681f3Smrg         load_num_channels = 1 << log_split;
15387ec681f3Smrg         load_log_size = 2;
15397ec681f3Smrg         for (unsigned i = 0; i < load_num_channels; ++i) {
15407ec681f3Smrg            tmp = LLVMConstInt(ctx->i32, i, false);
15417ec681f3Smrg            loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, "");
15427ec681f3Smrg         }
15437ec681f3Smrg      }
15447ec681f3Smrg
15457ec681f3Smrg      /* Further split dwords and shorts if required */
15467ec681f3Smrg      if (log_recombine < 0) {
15477ec681f3Smrg         for (unsigned src = load_num_channels, dst = load_num_channels << -log_recombine; src > 0;
15487ec681f3Smrg              --src) {
15497ec681f3Smrg            unsigned dst_bits = 1 << (3 + load_log_size + log_recombine);
15507ec681f3Smrg            LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits);
15517ec681f3Smrg            LLVMValueRef loaded = loads[src - 1];
15527ec681f3Smrg            LLVMTypeRef loaded_type = LLVMTypeOf(loaded);
15537ec681f3Smrg            for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) {
15547ec681f3Smrg               tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false);
15557ec681f3Smrg               tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, "");
15567ec681f3Smrg               loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, "");
15577ec681f3Smrg            }
15587ec681f3Smrg         }
15597ec681f3Smrg      }
15607ec681f3Smrg   }
15617ec681f3Smrg
15627ec681f3Smrg   if (log_size == 3) {
15637ec681f3Smrg      if (format == AC_FETCH_FORMAT_FLOAT) {
15647ec681f3Smrg         for (unsigned i = 0; i < num_channels; ++i) {
15657ec681f3Smrg            tmp = ac_build_gather_values(ctx, &loads[2 * i], 2);
15667ec681f3Smrg            loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, "");
15677ec681f3Smrg         }
15687ec681f3Smrg      } else if (format == AC_FETCH_FORMAT_FIXED) {
15697ec681f3Smrg         /* 10_11_11_FLOAT */
15707ec681f3Smrg         LLVMValueRef data = loads[0];
15717ec681f3Smrg         LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false);
15727ec681f3Smrg         LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, "");
15737ec681f3Smrg         tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), "");
15747ec681f3Smrg         LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, "");
15757ec681f3Smrg         LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), "");
15767ec681f3Smrg
15777ec681f3Smrg         loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6));
15787ec681f3Smrg         loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6));
15797ec681f3Smrg         loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5));
15807ec681f3Smrg
15817ec681f3Smrg         num_channels = 3;
15827ec681f3Smrg         log_size = 2;
15837ec681f3Smrg         format = AC_FETCH_FORMAT_FLOAT;
15847ec681f3Smrg      } else {
15857ec681f3Smrg         /* 2_10_10_10 data formats */
15867ec681f3Smrg         LLVMValueRef data = loads[0];
15877ec681f3Smrg         LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10);
15887ec681f3Smrg         LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2);
15897ec681f3Smrg         loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, "");
15907ec681f3Smrg         tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), "");
15917ec681f3Smrg         loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
15927ec681f3Smrg         tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), "");
15937ec681f3Smrg         loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
15947ec681f3Smrg         tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), "");
15957ec681f3Smrg         loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, "");
15967ec681f3Smrg
15977ec681f3Smrg         num_channels = 4;
15987ec681f3Smrg      }
15997ec681f3Smrg   }
16007ec681f3Smrg
16017ec681f3Smrg   if (format == AC_FETCH_FORMAT_FLOAT) {
16027ec681f3Smrg      if (log_size != 2) {
16037ec681f3Smrg         for (unsigned chan = 0; chan < num_channels; ++chan) {
16047ec681f3Smrg            tmp = ac_to_float(ctx, loads[chan]);
16057ec681f3Smrg            if (log_size == 3)
16067ec681f3Smrg               tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, "");
16077ec681f3Smrg            else if (log_size == 1)
16087ec681f3Smrg               tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, "");
16097ec681f3Smrg            loads[chan] = ac_to_integer(ctx, tmp);
16107ec681f3Smrg         }
16117ec681f3Smrg      }
16127ec681f3Smrg   } else if (format == AC_FETCH_FORMAT_UINT) {
16137ec681f3Smrg      if (log_size != 2) {
16147ec681f3Smrg         for (unsigned chan = 0; chan < num_channels; ++chan)
16157ec681f3Smrg            loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, "");
16167ec681f3Smrg      }
16177ec681f3Smrg   } else if (format == AC_FETCH_FORMAT_SINT) {
16187ec681f3Smrg      if (log_size != 2) {
16197ec681f3Smrg         for (unsigned chan = 0; chan < num_channels; ++chan)
16207ec681f3Smrg            loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, "");
16217ec681f3Smrg      }
16227ec681f3Smrg   } else {
16237ec681f3Smrg      bool unsign = format == AC_FETCH_FORMAT_UNORM || format == AC_FETCH_FORMAT_USCALED ||
16247ec681f3Smrg                    format == AC_FETCH_FORMAT_UINT;
16257ec681f3Smrg
16267ec681f3Smrg      for (unsigned chan = 0; chan < num_channels; ++chan) {
16277ec681f3Smrg         if (unsign) {
16287ec681f3Smrg            tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, "");
16297ec681f3Smrg         } else {
16307ec681f3Smrg            tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, "");
16317ec681f3Smrg         }
16327ec681f3Smrg
16337ec681f3Smrg         LLVMValueRef scale = NULL;
16347ec681f3Smrg         if (format == AC_FETCH_FORMAT_FIXED) {
16357ec681f3Smrg            assert(log_size == 2);
16367ec681f3Smrg            scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000);
16377ec681f3Smrg         } else if (format == AC_FETCH_FORMAT_UNORM) {
16387ec681f3Smrg            unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
16397ec681f3Smrg            scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1));
16407ec681f3Smrg         } else if (format == AC_FETCH_FORMAT_SNORM) {
16417ec681f3Smrg            unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
16427ec681f3Smrg            scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1));
16437ec681f3Smrg         }
16447ec681f3Smrg         if (scale)
16457ec681f3Smrg            tmp = LLVMBuildFMul(ctx->builder, tmp, scale, "");
16467ec681f3Smrg
16477ec681f3Smrg         if (format == AC_FETCH_FORMAT_SNORM) {
16487ec681f3Smrg            /* Clamp to [-1, 1] */
16497ec681f3Smrg            LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
16507ec681f3Smrg            LLVMValueRef clamp = LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, "");
16517ec681f3Smrg            tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, "");
16527ec681f3Smrg         }
16537ec681f3Smrg
16547ec681f3Smrg         loads[chan] = ac_to_integer(ctx, tmp);
16557ec681f3Smrg      }
16567ec681f3Smrg   }
16577ec681f3Smrg
16587ec681f3Smrg   while (num_channels < 4) {
16597ec681f3Smrg      if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) {
16607ec681f3Smrg         loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0;
16617ec681f3Smrg      } else {
16627ec681f3Smrg         loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0);
16637ec681f3Smrg      }
16647ec681f3Smrg      num_channels++;
16657ec681f3Smrg   }
16667ec681f3Smrg
16677ec681f3Smrg   if (reverse) {
16687ec681f3Smrg      tmp = loads[0];
16697ec681f3Smrg      loads[0] = loads[2];
16707ec681f3Smrg      loads[2] = tmp;
16717ec681f3Smrg   }
16727ec681f3Smrg
16737ec681f3Smrg   return ac_build_gather_values(ctx, loads, 4);
16747ec681f3Smrg}
16757ec681f3Smrg
16767ec681f3Smrgstatic void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
16777ec681f3Smrg                                   LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset,
16787ec681f3Smrg                                   LLVMValueRef soffset, LLVMValueRef immoffset,
16797ec681f3Smrg                                   unsigned num_channels, unsigned dfmt, unsigned nfmt,
16807ec681f3Smrg                                   unsigned cache_policy, bool structurized)
16817ec681f3Smrg{
16827ec681f3Smrg   voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, immoffset, "");
16837ec681f3Smrg
16847ec681f3Smrg   LLVMValueRef args[7];
16857ec681f3Smrg   int idx = 0;
16867ec681f3Smrg   args[idx++] = vdata;
16877ec681f3Smrg   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
16887ec681f3Smrg   if (structurized)
16897ec681f3Smrg      args[idx++] = vindex ? vindex : ctx->i32_0;
16907ec681f3Smrg   args[idx++] = voffset ? voffset : ctx->i32_0;
16917ec681f3Smrg   args[idx++] = soffset ? soffset : ctx->i32_0;
16927ec681f3Smrg   args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
16937ec681f3Smrg   args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
16947ec681f3Smrg   unsigned func =
16957ec681f3Smrg      !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
16967ec681f3Smrg   const char *indexing_kind = structurized ? "struct" : "raw";
16977ec681f3Smrg   char name[256], type_name[8];
16987ec681f3Smrg
16997ec681f3Smrg   LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
17007ec681f3Smrg   ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
17017ec681f3Smrg
17027ec681f3Smrg   snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", indexing_kind, type_name);
17037ec681f3Smrg
17047ec681f3Smrg   ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
17057ec681f3Smrg}
17067ec681f3Smrg
17077ec681f3Smrgvoid ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
17087ec681f3Smrg                                   LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset,
17097ec681f3Smrg                                   LLVMValueRef soffset, LLVMValueRef immoffset,
17107ec681f3Smrg                                   unsigned num_channels, unsigned dfmt, unsigned nfmt,
17117ec681f3Smrg                                   unsigned cache_policy)
17127ec681f3Smrg{
17137ec681f3Smrg   ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, immoffset, num_channels, dfmt,
17147ec681f3Smrg                          nfmt, cache_policy, true);
17157ec681f3Smrg}
17167ec681f3Smrg
17177ec681f3Smrgvoid ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
17187ec681f3Smrg                                LLVMValueRef voffset, LLVMValueRef soffset, LLVMValueRef immoffset,
17197ec681f3Smrg                                unsigned num_channels, unsigned dfmt, unsigned nfmt,
17207ec681f3Smrg                                unsigned cache_policy)
17217ec681f3Smrg{
17227ec681f3Smrg   ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset, immoffset, num_channels, dfmt,
17237ec681f3Smrg                          nfmt, cache_policy, false);
17247ec681f3Smrg}
17257ec681f3Smrg
17267ec681f3Smrgvoid ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
17277ec681f3Smrg                                  LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset,
17287ec681f3Smrg                                  unsigned cache_policy)
17297ec681f3Smrg{
17307ec681f3Smrg   vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
17317ec681f3Smrg
17327ec681f3Smrg   ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false,
17337ec681f3Smrg                                false);
17347ec681f3Smrg}
17357ec681f3Smrg
17367ec681f3Smrgvoid ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
17377ec681f3Smrg                                 LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy)
17387ec681f3Smrg{
17397ec681f3Smrg   vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
17407ec681f3Smrg
17417ec681f3Smrg   ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false,
17427ec681f3Smrg                                false);
17437ec681f3Smrg}
17447ec681f3Smrg
17457ec681f3Smrg/**
17467ec681f3Smrg * Set range metadata on an instruction.  This can only be used on load and
17477ec681f3Smrg * call instructions.  If you know an instruction can only produce the values
17487ec681f3Smrg * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
17497ec681f3Smrg * \p lo is the minimum value inclusive.
17507ec681f3Smrg * \p hi is the maximum value exclusive.
17517ec681f3Smrg */
17527ec681f3Smrgvoid ac_set_range_metadata(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned lo,
17537ec681f3Smrg                           unsigned hi)
17547ec681f3Smrg{
17557ec681f3Smrg   LLVMValueRef range_md, md_args[2];
17567ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(value);
17577ec681f3Smrg   LLVMContextRef context = LLVMGetTypeContext(type);
17587ec681f3Smrg
17597ec681f3Smrg   md_args[0] = LLVMConstInt(type, lo, false);
17607ec681f3Smrg   md_args[1] = LLVMConstInt(type, hi, false);
17617ec681f3Smrg   range_md = LLVMMDNodeInContext(context, md_args, 2);
17627ec681f3Smrg   LLVMSetMetadata(value, ctx->range_md_kind, range_md);
17637ec681f3Smrg}
17647ec681f3Smrg
17657ec681f3SmrgLLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx)
17667ec681f3Smrg{
17677ec681f3Smrg   return ac_build_mbcnt(ctx, LLVMConstInt(ctx->iN_wavemask, ~0ull, 0));
17687ec681f3Smrg}
17697ec681f3Smrg
17707ec681f3Smrg/*
17717ec681f3Smrg * AMD GCN implements derivatives using the local data store (LDS)
17727ec681f3Smrg * All writes to the LDS happen in all executing threads at
17737ec681f3Smrg * the same time. TID is the Thread ID for the current
17747ec681f3Smrg * thread and is a value between 0 and 63, representing
17757ec681f3Smrg * the thread's position in the wavefront.
17767ec681f3Smrg *
17777ec681f3Smrg * For the pixel shader threads are grouped into quads of four pixels.
17787ec681f3Smrg * The TIDs of the pixels of a quad are:
17797ec681f3Smrg *
17807ec681f3Smrg *  +------+------+
17817ec681f3Smrg *  |4n + 0|4n + 1|
17827ec681f3Smrg *  +------+------+
17837ec681f3Smrg *  |4n + 2|4n + 3|
17847ec681f3Smrg *  +------+------+
17857ec681f3Smrg *
17867ec681f3Smrg * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
17877ec681f3Smrg * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
17887ec681f3Smrg * the current pixel's column, and masking with 0xfffffffe yields the TID
17897ec681f3Smrg * of the left pixel of the current pixel's row.
17907ec681f3Smrg *
17917ec681f3Smrg * Adding 1 yields the TID of the pixel to the right of the left pixel, and
17927ec681f3Smrg * adding 2 yields the TID of the pixel below the top pixel.
17937ec681f3Smrg */
17947ec681f3SmrgLLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, uint32_t mask, int idx, LLVMValueRef val)
17957ec681f3Smrg{
17967ec681f3Smrg   unsigned tl_lanes[4], trbl_lanes[4];
17977ec681f3Smrg   char name[32], type[8];
17987ec681f3Smrg   LLVMValueRef tl, trbl;
17997ec681f3Smrg   LLVMTypeRef result_type;
18007ec681f3Smrg   LLVMValueRef result;
18017ec681f3Smrg
18027ec681f3Smrg   result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
18037ec681f3Smrg
18047ec681f3Smrg   if (result_type == ctx->f16)
18057ec681f3Smrg      val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
18067ec681f3Smrg   else if (result_type == ctx->v2f16)
18077ec681f3Smrg      val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
18087ec681f3Smrg
18097ec681f3Smrg   for (unsigned i = 0; i < 4; ++i) {
18107ec681f3Smrg      tl_lanes[i] = i & mask;
18117ec681f3Smrg      trbl_lanes[i] = (i & mask) + idx;
18127ec681f3Smrg   }
18137ec681f3Smrg
18147ec681f3Smrg   tl = ac_build_quad_swizzle(ctx, val, tl_lanes[0], tl_lanes[1], tl_lanes[2], tl_lanes[3]);
18157ec681f3Smrg   trbl =
18167ec681f3Smrg      ac_build_quad_swizzle(ctx, val, trbl_lanes[0], trbl_lanes[1], trbl_lanes[2], trbl_lanes[3]);
18177ec681f3Smrg
18187ec681f3Smrg   if (result_type == ctx->f16) {
18197ec681f3Smrg      tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
18207ec681f3Smrg      trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
18217ec681f3Smrg   }
18227ec681f3Smrg
18237ec681f3Smrg   tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
18247ec681f3Smrg   trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
18257ec681f3Smrg   result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
18267ec681f3Smrg
18277ec681f3Smrg   ac_build_type_name_for_intr(result_type, type, sizeof(type));
18287ec681f3Smrg   snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
18297ec681f3Smrg
18307ec681f3Smrg   return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
18317ec681f3Smrg}
18327ec681f3Smrg
18337ec681f3Smrgvoid ac_build_sendmsg(struct ac_llvm_context *ctx, uint32_t msg, LLVMValueRef wave_id)
18347ec681f3Smrg{
18357ec681f3Smrg   LLVMValueRef args[2];
18367ec681f3Smrg   args[0] = LLVMConstInt(ctx->i32, msg, false);
18377ec681f3Smrg   args[1] = wave_id;
18387ec681f3Smrg   ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
18397ec681f3Smrg}
18407ec681f3Smrg
18417ec681f3SmrgLLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type)
18427ec681f3Smrg{
18437ec681f3Smrg   LLVMValueRef msb =
18447ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32", dst_type, &arg, 1, AC_FUNC_ATTR_READNONE);
18457ec681f3Smrg
18467ec681f3Smrg   /* The HW returns the last bit index from MSB, but NIR/TGSI wants
18477ec681f3Smrg    * the index from LSB. Invert it by doing "31 - msb". */
18487ec681f3Smrg   msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), msb, "");
18497ec681f3Smrg
18507ec681f3Smrg   LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
18517ec681f3Smrg   LLVMValueRef cond =
18527ec681f3Smrg      LLVMBuildOr(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, ctx->i32_0, ""),
18537ec681f3Smrg                  LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, all_ones, ""), "");
18547ec681f3Smrg
18557ec681f3Smrg   return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
18567ec681f3Smrg}
18577ec681f3Smrg
18587ec681f3SmrgLLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type)
18597ec681f3Smrg{
18607ec681f3Smrg   const char *intrin_name;
18617ec681f3Smrg   LLVMTypeRef type;
18627ec681f3Smrg   LLVMValueRef highest_bit;
18637ec681f3Smrg   LLVMValueRef zero;
18647ec681f3Smrg   unsigned bitsize;
18657ec681f3Smrg
18667ec681f3Smrg   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
18677ec681f3Smrg   switch (bitsize) {
18687ec681f3Smrg   case 64:
18697ec681f3Smrg      intrin_name = "llvm.ctlz.i64";
18707ec681f3Smrg      type = ctx->i64;
18717ec681f3Smrg      highest_bit = LLVMConstInt(ctx->i64, 63, false);
18727ec681f3Smrg      zero = ctx->i64_0;
18737ec681f3Smrg      break;
18747ec681f3Smrg   case 32:
18757ec681f3Smrg      intrin_name = "llvm.ctlz.i32";
18767ec681f3Smrg      type = ctx->i32;
18777ec681f3Smrg      highest_bit = LLVMConstInt(ctx->i32, 31, false);
18787ec681f3Smrg      zero = ctx->i32_0;
18797ec681f3Smrg      break;
18807ec681f3Smrg   case 16:
18817ec681f3Smrg      intrin_name = "llvm.ctlz.i16";
18827ec681f3Smrg      type = ctx->i16;
18837ec681f3Smrg      highest_bit = LLVMConstInt(ctx->i16, 15, false);
18847ec681f3Smrg      zero = ctx->i16_0;
18857ec681f3Smrg      break;
18867ec681f3Smrg   case 8:
18877ec681f3Smrg      intrin_name = "llvm.ctlz.i8";
18887ec681f3Smrg      type = ctx->i8;
18897ec681f3Smrg      highest_bit = LLVMConstInt(ctx->i8, 7, false);
18907ec681f3Smrg      zero = ctx->i8_0;
18917ec681f3Smrg      break;
18927ec681f3Smrg   default:
18937ec681f3Smrg      unreachable(!"invalid bitsize");
18947ec681f3Smrg      break;
18957ec681f3Smrg   }
18967ec681f3Smrg
18977ec681f3Smrg   LLVMValueRef params[2] = {
18987ec681f3Smrg      arg,
18997ec681f3Smrg      ctx->i1true,
19007ec681f3Smrg   };
19017ec681f3Smrg
19027ec681f3Smrg   LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE);
19037ec681f3Smrg
19047ec681f3Smrg   /* The HW returns the last bit index from MSB, but TGSI/NIR wants
19057ec681f3Smrg    * the index from LSB. Invert it by doing "31 - msb". */
19067ec681f3Smrg   msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
19077ec681f3Smrg
19087ec681f3Smrg   if (bitsize == 64) {
19097ec681f3Smrg      msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
19107ec681f3Smrg   } else if (bitsize < 32) {
19117ec681f3Smrg      msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
19127ec681f3Smrg   }
19137ec681f3Smrg
19147ec681f3Smrg   /* check for zero */
19157ec681f3Smrg   return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
19167ec681f3Smrg                          LLVMConstInt(ctx->i32, -1, true), msb, "");
19177ec681f3Smrg}
19187ec681f3Smrg
19197ec681f3SmrgLLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
19207ec681f3Smrg{
19217ec681f3Smrg   char name[64], type[64];
19227ec681f3Smrg
19237ec681f3Smrg   ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
19247ec681f3Smrg   snprintf(name, sizeof(name), "llvm.minnum.%s", type);
19257ec681f3Smrg   LLVMValueRef args[2] = {a, b};
19267ec681f3Smrg   return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE);
19277ec681f3Smrg}
19287ec681f3Smrg
19297ec681f3SmrgLLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
19307ec681f3Smrg{
19317ec681f3Smrg   char name[64], type[64];
19327ec681f3Smrg
19337ec681f3Smrg   ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
19347ec681f3Smrg   snprintf(name, sizeof(name), "llvm.maxnum.%s", type);
19357ec681f3Smrg   LLVMValueRef args[2] = {a, b};
19367ec681f3Smrg   return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE);
19377ec681f3Smrg}
19387ec681f3Smrg
19397ec681f3SmrgLLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
19407ec681f3Smrg{
19417ec681f3Smrg   LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
19427ec681f3Smrg   return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
19437ec681f3Smrg}
19447ec681f3Smrg
19457ec681f3SmrgLLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
19467ec681f3Smrg{
19477ec681f3Smrg   LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
19487ec681f3Smrg   return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
19497ec681f3Smrg}
19507ec681f3Smrg
19517ec681f3SmrgLLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
19527ec681f3Smrg{
19537ec681f3Smrg   LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
19547ec681f3Smrg   return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
19557ec681f3Smrg}
19567ec681f3Smrg
19577ec681f3SmrgLLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
19587ec681f3Smrg{
19597ec681f3Smrg   LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
19607ec681f3Smrg   return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
19617ec681f3Smrg}
19627ec681f3Smrg
19637ec681f3SmrgLLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
19647ec681f3Smrg{
19657ec681f3Smrg   LLVMTypeRef t = LLVMTypeOf(value);
19667ec681f3Smrg   return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
19677ec681f3Smrg                        LLVMConstReal(t, 1.0));
19687ec681f3Smrg}
19697ec681f3Smrg
19707ec681f3Smrgvoid ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
19717ec681f3Smrg{
19727ec681f3Smrg   LLVMValueRef args[9];
19737ec681f3Smrg
19747ec681f3Smrg   args[0] = LLVMConstInt(ctx->i32, a->target, 0);
19757ec681f3Smrg   args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
19767ec681f3Smrg
19777ec681f3Smrg   if (a->compr) {
19787ec681f3Smrg      args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], ctx->v2i16, "");
19797ec681f3Smrg      args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], ctx->v2i16, "");
19807ec681f3Smrg      args[4] = LLVMConstInt(ctx->i1, a->done, 0);
19817ec681f3Smrg      args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
19827ec681f3Smrg
19837ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", ctx->voidt, args, 6, 0);
19847ec681f3Smrg   } else {
19857ec681f3Smrg      args[2] = a->out[0];
19867ec681f3Smrg      args[3] = a->out[1];
19877ec681f3Smrg      args[4] = a->out[2];
19887ec681f3Smrg      args[5] = a->out[3];
19897ec681f3Smrg      args[6] = LLVMConstInt(ctx->i1, a->done, 0);
19907ec681f3Smrg      args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
19917ec681f3Smrg
19927ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0);
19937ec681f3Smrg   }
19947ec681f3Smrg}
19957ec681f3Smrg
19967ec681f3Smrgvoid ac_build_export_null(struct ac_llvm_context *ctx)
19977ec681f3Smrg{
19987ec681f3Smrg   struct ac_export_args args;
19997ec681f3Smrg
20007ec681f3Smrg   args.enabled_channels = 0x0; /* enabled channels */
20017ec681f3Smrg   args.valid_mask = 1;         /* whether the EXEC mask is valid */
20027ec681f3Smrg   args.done = 1;               /* DONE bit */
20037ec681f3Smrg   args.target = V_008DFC_SQ_EXP_NULL;
20047ec681f3Smrg   args.compr = 0;                       /* COMPR flag (0 = 32-bit export) */
20057ec681f3Smrg   args.out[0] = LLVMGetUndef(ctx->f32); /* R */
20067ec681f3Smrg   args.out[1] = LLVMGetUndef(ctx->f32); /* G */
20077ec681f3Smrg   args.out[2] = LLVMGetUndef(ctx->f32); /* B */
20087ec681f3Smrg   args.out[3] = LLVMGetUndef(ctx->f32); /* A */
20097ec681f3Smrg
20107ec681f3Smrg   ac_build_export(ctx, &args);
20117ec681f3Smrg}
20127ec681f3Smrg
20137ec681f3Smrgstatic unsigned ac_num_coords(enum ac_image_dim dim)
20147ec681f3Smrg{
20157ec681f3Smrg   switch (dim) {
20167ec681f3Smrg   case ac_image_1d:
20177ec681f3Smrg      return 1;
20187ec681f3Smrg   case ac_image_2d:
20197ec681f3Smrg   case ac_image_1darray:
20207ec681f3Smrg      return 2;
20217ec681f3Smrg   case ac_image_3d:
20227ec681f3Smrg   case ac_image_cube:
20237ec681f3Smrg   case ac_image_2darray:
20247ec681f3Smrg   case ac_image_2dmsaa:
20257ec681f3Smrg      return 3;
20267ec681f3Smrg   case ac_image_2darraymsaa:
20277ec681f3Smrg      return 4;
20287ec681f3Smrg   default:
20297ec681f3Smrg      unreachable("ac_num_coords: bad dim");
20307ec681f3Smrg   }
20317ec681f3Smrg}
20327ec681f3Smrg
20337ec681f3Smrgstatic unsigned ac_num_derivs(enum ac_image_dim dim)
20347ec681f3Smrg{
20357ec681f3Smrg   switch (dim) {
20367ec681f3Smrg   case ac_image_1d:
20377ec681f3Smrg   case ac_image_1darray:
20387ec681f3Smrg      return 2;
20397ec681f3Smrg   case ac_image_2d:
20407ec681f3Smrg   case ac_image_2darray:
20417ec681f3Smrg   case ac_image_cube:
20427ec681f3Smrg      return 4;
20437ec681f3Smrg   case ac_image_3d:
20447ec681f3Smrg      return 6;
20457ec681f3Smrg   case ac_image_2dmsaa:
20467ec681f3Smrg   case ac_image_2darraymsaa:
20477ec681f3Smrg   default:
20487ec681f3Smrg      unreachable("derivatives not supported");
20497ec681f3Smrg   }
20507ec681f3Smrg}
20517ec681f3Smrg
20527ec681f3Smrgstatic const char *get_atomic_name(enum ac_atomic_op op)
20537ec681f3Smrg{
20547ec681f3Smrg   switch (op) {
20557ec681f3Smrg   case ac_atomic_swap:
20567ec681f3Smrg      return "swap";
20577ec681f3Smrg   case ac_atomic_add:
20587ec681f3Smrg      return "add";
20597ec681f3Smrg   case ac_atomic_sub:
20607ec681f3Smrg      return "sub";
20617ec681f3Smrg   case ac_atomic_smin:
20627ec681f3Smrg      return "smin";
20637ec681f3Smrg   case ac_atomic_umin:
20647ec681f3Smrg      return "umin";
20657ec681f3Smrg   case ac_atomic_smax:
20667ec681f3Smrg      return "smax";
20677ec681f3Smrg   case ac_atomic_umax:
20687ec681f3Smrg      return "umax";
20697ec681f3Smrg   case ac_atomic_and:
20707ec681f3Smrg      return "and";
20717ec681f3Smrg   case ac_atomic_or:
20727ec681f3Smrg      return "or";
20737ec681f3Smrg   case ac_atomic_xor:
20747ec681f3Smrg      return "xor";
20757ec681f3Smrg   case ac_atomic_inc_wrap:
20767ec681f3Smrg      return "inc";
20777ec681f3Smrg   case ac_atomic_dec_wrap:
20787ec681f3Smrg      return "dec";
20797ec681f3Smrg   case ac_atomic_fmin:
20807ec681f3Smrg      return "fmin";
20817ec681f3Smrg   case ac_atomic_fmax:
20827ec681f3Smrg      return "fmax";
20837ec681f3Smrg   }
20847ec681f3Smrg   unreachable("bad atomic op");
20857ec681f3Smrg}
20867ec681f3Smrg
20877ec681f3SmrgLLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a)
20887ec681f3Smrg{
20897ec681f3Smrg   const char *overload[3] = {"", "", ""};
20907ec681f3Smrg   unsigned num_overloads = 0;
20917ec681f3Smrg   LLVMValueRef args[18];
20927ec681f3Smrg   unsigned num_args = 0;
20937ec681f3Smrg   enum ac_image_dim dim = a->dim;
20947ec681f3Smrg
20957ec681f3Smrg   assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || !a->level_zero);
20967ec681f3Smrg   assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
20977ec681f3Smrg           a->opcode != ac_image_store_mip) ||
20987ec681f3Smrg          a->lod);
20997ec681f3Smrg   assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
21007ec681f3Smrg          (!a->compare && !a->offset));
21017ec681f3Smrg   assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
21027ec681f3Smrg           a->opcode == ac_image_get_lod) ||
21037ec681f3Smrg          !a->bias);
21047ec681f3Smrg   assert((a->bias ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <=
21057ec681f3Smrg          1);
21067ec681f3Smrg   assert((a->min_lod ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) <= 1);
21077ec681f3Smrg   assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic &&
21087ec681f3Smrg                      a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod &&
21097ec681f3Smrg                      a->opcode != ac_image_get_resinfo));
21107ec681f3Smrg   assert(!a->a16 || ctx->chip_class >= GFX9);
21117ec681f3Smrg   assert(a->g16 == a->a16 || ctx->chip_class >= GFX10);
21127ec681f3Smrg
21137ec681f3Smrg   assert(!a->offset ||
21147ec681f3Smrg          ac_get_elem_bits(ctx, LLVMTypeOf(a->offset)) == 32);
21157ec681f3Smrg   assert(!a->bias ||
21167ec681f3Smrg          ac_get_elem_bits(ctx, LLVMTypeOf(a->bias)) == 32);
21177ec681f3Smrg   assert(!a->compare ||
21187ec681f3Smrg          ac_get_elem_bits(ctx, LLVMTypeOf(a->compare)) == 32);
21197ec681f3Smrg   assert(!a->derivs[0] ||
21207ec681f3Smrg          ((!a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 16) &&
21217ec681f3Smrg           (a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 32)));
21227ec681f3Smrg   assert(!a->coords[0] ||
21237ec681f3Smrg          ((!a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 16) &&
21247ec681f3Smrg           (a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 32)));
21257ec681f3Smrg   assert(!a->lod ||
21267ec681f3Smrg          ((a->opcode != ac_image_get_resinfo || ac_get_elem_bits(ctx, LLVMTypeOf(a->lod))) &&
21277ec681f3Smrg           (a->opcode == ac_image_get_resinfo ||
21287ec681f3Smrg            ac_get_elem_bits(ctx, LLVMTypeOf(a->lod)) ==
21297ec681f3Smrg            ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])))));
21307ec681f3Smrg   assert(!a->min_lod ||
21317ec681f3Smrg          ac_get_elem_bits(ctx, LLVMTypeOf(a->min_lod)) ==
21327ec681f3Smrg          ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])));
21337ec681f3Smrg
21347ec681f3Smrg   if (a->opcode == ac_image_get_lod) {
21357ec681f3Smrg      switch (dim) {
21367ec681f3Smrg      case ac_image_1darray:
21377ec681f3Smrg         dim = ac_image_1d;
21387ec681f3Smrg         break;
21397ec681f3Smrg      case ac_image_2darray:
21407ec681f3Smrg      case ac_image_cube:
21417ec681f3Smrg         dim = ac_image_2d;
21427ec681f3Smrg         break;
21437ec681f3Smrg      default:
21447ec681f3Smrg         break;
21457ec681f3Smrg      }
21467ec681f3Smrg   }
21477ec681f3Smrg
21487ec681f3Smrg   bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
21497ec681f3Smrg                 a->opcode == ac_image_get_lod;
21507ec681f3Smrg   bool atomic = a->opcode == ac_image_atomic || a->opcode == ac_image_atomic_cmpswap;
21517ec681f3Smrg   bool load = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
21527ec681f3Smrg               a->opcode == ac_image_load || a->opcode == ac_image_load_mip;
21537ec681f3Smrg   LLVMTypeRef coord_type = sample ? (a->a16 ? ctx->f16 : ctx->f32) : (a->a16 ? ctx->i16 : ctx->i32);
21547ec681f3Smrg   uint8_t dmask = a->dmask;
21557ec681f3Smrg   LLVMTypeRef data_type;
21567ec681f3Smrg   char data_type_str[32];
21577ec681f3Smrg
21587ec681f3Smrg   if (atomic) {
21597ec681f3Smrg      data_type = LLVMTypeOf(a->data[0]);
21607ec681f3Smrg   } else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
21617ec681f3Smrg      /* Image stores might have been shrinked using the format. */
21627ec681f3Smrg      data_type = LLVMTypeOf(a->data[0]);
21637ec681f3Smrg      dmask = (1 << ac_get_llvm_num_components(a->data[0])) - 1;
21647ec681f3Smrg   } else {
21657ec681f3Smrg      data_type = a->d16 ? ctx->v4f16 : ctx->v4f32;
21667ec681f3Smrg   }
21677ec681f3Smrg
21687ec681f3Smrg   if (a->tfe) {
21697ec681f3Smrg      data_type = LLVMStructTypeInContext(
21707ec681f3Smrg         ctx->context, (LLVMTypeRef[]){data_type, ctx->i32}, 2, false);
21717ec681f3Smrg   }
21727ec681f3Smrg
21737ec681f3Smrg   if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
21747ec681f3Smrg      args[num_args++] = a->data[0];
21757ec681f3Smrg      if (a->opcode == ac_image_atomic_cmpswap)
21767ec681f3Smrg         args[num_args++] = a->data[1];
21777ec681f3Smrg   }
21787ec681f3Smrg
21797ec681f3Smrg   if (!atomic)
21807ec681f3Smrg      args[num_args++] = LLVMConstInt(ctx->i32, dmask, false);
21817ec681f3Smrg
21827ec681f3Smrg   if (a->offset)
21837ec681f3Smrg      args[num_args++] = ac_to_integer(ctx, a->offset);
21847ec681f3Smrg   if (a->bias) {
21857ec681f3Smrg      args[num_args++] = ac_to_float(ctx, a->bias);
21867ec681f3Smrg      overload[num_overloads++] = ".f32";
21877ec681f3Smrg   }
21887ec681f3Smrg   if (a->compare)
21897ec681f3Smrg      args[num_args++] = ac_to_float(ctx, a->compare);
21907ec681f3Smrg   if (a->derivs[0]) {
21917ec681f3Smrg      unsigned count = ac_num_derivs(dim);
21927ec681f3Smrg      for (unsigned i = 0; i < count; ++i)
21937ec681f3Smrg         args[num_args++] = ac_to_float(ctx, a->derivs[i]);
21947ec681f3Smrg      overload[num_overloads++] = a->g16 ? ".f16" : ".f32";
21957ec681f3Smrg   }
21967ec681f3Smrg   unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
21977ec681f3Smrg   for (unsigned i = 0; i < num_coords; ++i)
21987ec681f3Smrg      args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
21997ec681f3Smrg   if (a->lod)
22007ec681f3Smrg      args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
22017ec681f3Smrg   if (a->min_lod)
22027ec681f3Smrg      args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
22037ec681f3Smrg
22047ec681f3Smrg   overload[num_overloads++] = sample ? (a->a16 ? ".f16" : ".f32") : (a->a16 ? ".i16" : ".i32");
22057ec681f3Smrg
22067ec681f3Smrg   args[num_args++] = a->resource;
22077ec681f3Smrg   if (sample) {
22087ec681f3Smrg      args[num_args++] = a->sampler;
22097ec681f3Smrg      args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
22107ec681f3Smrg   }
22117ec681f3Smrg
22127ec681f3Smrg   args[num_args++] = a->tfe ? ctx->i32_1 : ctx->i32_0; /* texfailctrl */
22137ec681f3Smrg   args[num_args++] = LLVMConstInt(
22147ec681f3Smrg      ctx->i32, load ? get_load_cache_policy(ctx, a->cache_policy) : a->cache_policy, false);
22157ec681f3Smrg
22167ec681f3Smrg   const char *name;
22177ec681f3Smrg   const char *atomic_subop = "";
22187ec681f3Smrg   switch (a->opcode) {
22197ec681f3Smrg   case ac_image_sample:
22207ec681f3Smrg      name = "sample";
22217ec681f3Smrg      break;
22227ec681f3Smrg   case ac_image_gather4:
22237ec681f3Smrg      name = "gather4";
22247ec681f3Smrg      break;
22257ec681f3Smrg   case ac_image_load:
22267ec681f3Smrg      name = "load";
22277ec681f3Smrg      break;
22287ec681f3Smrg   case ac_image_load_mip:
22297ec681f3Smrg      name = "load.mip";
22307ec681f3Smrg      break;
22317ec681f3Smrg   case ac_image_store:
22327ec681f3Smrg      name = "store";
22337ec681f3Smrg      break;
22347ec681f3Smrg   case ac_image_store_mip:
22357ec681f3Smrg      name = "store.mip";
22367ec681f3Smrg      break;
22377ec681f3Smrg   case ac_image_atomic:
22387ec681f3Smrg      name = "atomic.";
22397ec681f3Smrg      atomic_subop = get_atomic_name(a->atomic);
22407ec681f3Smrg      break;
22417ec681f3Smrg   case ac_image_atomic_cmpswap:
22427ec681f3Smrg      name = "atomic.";
22437ec681f3Smrg      atomic_subop = "cmpswap";
22447ec681f3Smrg      break;
22457ec681f3Smrg   case ac_image_get_lod:
22467ec681f3Smrg      name = "getlod";
22477ec681f3Smrg      break;
22487ec681f3Smrg   case ac_image_get_resinfo:
22497ec681f3Smrg      name = "getresinfo";
22507ec681f3Smrg      break;
22517ec681f3Smrg   default:
22527ec681f3Smrg      unreachable("invalid image opcode");
22537ec681f3Smrg   }
22547ec681f3Smrg
22557ec681f3Smrg   const char *dimname;
22567ec681f3Smrg   switch (dim) {
22577ec681f3Smrg   case ac_image_1d:
22587ec681f3Smrg      dimname = "1d";
22597ec681f3Smrg      break;
22607ec681f3Smrg   case ac_image_2d:
22617ec681f3Smrg      dimname = "2d";
22627ec681f3Smrg      break;
22637ec681f3Smrg   case ac_image_3d:
22647ec681f3Smrg      dimname = "3d";
22657ec681f3Smrg      break;
22667ec681f3Smrg   case ac_image_cube:
22677ec681f3Smrg      dimname = "cube";
22687ec681f3Smrg      break;
22697ec681f3Smrg   case ac_image_1darray:
22707ec681f3Smrg      dimname = "1darray";
22717ec681f3Smrg      break;
22727ec681f3Smrg   case ac_image_2darray:
22737ec681f3Smrg      dimname = "2darray";
22747ec681f3Smrg      break;
22757ec681f3Smrg   case ac_image_2dmsaa:
22767ec681f3Smrg      dimname = "2dmsaa";
22777ec681f3Smrg      break;
22787ec681f3Smrg   case ac_image_2darraymsaa:
22797ec681f3Smrg      dimname = "2darraymsaa";
22807ec681f3Smrg      break;
22817ec681f3Smrg   default:
22827ec681f3Smrg      unreachable("invalid dim");
22837ec681f3Smrg   }
22847ec681f3Smrg
22857ec681f3Smrg   ac_build_type_name_for_intr(data_type, data_type_str, sizeof(data_type_str));
22867ec681f3Smrg
22877ec681f3Smrg   bool lod_suffix = a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
22887ec681f3Smrg   char intr_name[96];
22897ec681f3Smrg   snprintf(intr_name, sizeof(intr_name),
22907ec681f3Smrg            "llvm.amdgcn.image.%s%s" /* base name */
22917ec681f3Smrg            "%s%s%s%s"               /* sample/gather modifiers */
22927ec681f3Smrg            ".%s.%s%s%s%s",          /* dimension and type overloads */
22937ec681f3Smrg            name, atomic_subop, a->compare ? ".c" : "",
22947ec681f3Smrg            a->bias ? ".b" : lod_suffix ? ".l" : a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "",
22957ec681f3Smrg            a->min_lod ? ".cl" : "", a->offset ? ".o" : "", dimname,
22967ec681f3Smrg            data_type_str, overload[0], overload[1], overload[2]);
22977ec681f3Smrg
22987ec681f3Smrg   LLVMTypeRef retty;
22997ec681f3Smrg   if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
23007ec681f3Smrg      retty = ctx->voidt;
23017ec681f3Smrg   else
23027ec681f3Smrg      retty = data_type;
23037ec681f3Smrg
23047ec681f3Smrg   LLVMValueRef result = ac_build_intrinsic(ctx, intr_name, retty, args, num_args, a->attributes);
23057ec681f3Smrg   if (a->tfe) {
23067ec681f3Smrg      LLVMValueRef texel = LLVMBuildExtractValue(ctx->builder, result, 0, "");
23077ec681f3Smrg      LLVMValueRef code = LLVMBuildExtractValue(ctx->builder, result, 1, "");
23087ec681f3Smrg      result = ac_build_concat(ctx, texel, ac_to_float(ctx, code));
23097ec681f3Smrg   }
23107ec681f3Smrg
23117ec681f3Smrg   if (!sample && !atomic && retty != ctx->voidt)
23127ec681f3Smrg      result = ac_to_integer(ctx, result);
23137ec681f3Smrg
23147ec681f3Smrg   return result;
23157ec681f3Smrg}
23167ec681f3Smrg
23177ec681f3SmrgLLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef rsrc)
23187ec681f3Smrg{
23197ec681f3Smrg   LLVMValueRef samples;
23207ec681f3Smrg
23217ec681f3Smrg   /* Read the samples from the descriptor directly.
23227ec681f3Smrg    * Hardware doesn't have any instruction for this.
23237ec681f3Smrg    */
23247ec681f3Smrg   samples = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 3, 0), "");
23257ec681f3Smrg   samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, 0), "");
23267ec681f3Smrg   samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), "");
23277ec681f3Smrg   samples = LLVMBuildShl(ctx->builder, ctx->i32_1, samples, "");
23287ec681f3Smrg   return samples;
23297ec681f3Smrg}
23307ec681f3Smrg
23317ec681f3SmrgLLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
23327ec681f3Smrg{
23337ec681f3Smrg   return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2,
23347ec681f3Smrg                             AC_FUNC_ATTR_READNONE);
23357ec681f3Smrg}
23367ec681f3Smrg
23377ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
23387ec681f3Smrg{
23397ec681f3Smrg   LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", ctx->v2i16, args, 2,
23407ec681f3Smrg                                         AC_FUNC_ATTR_READNONE);
23417ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
23427ec681f3Smrg}
23437ec681f3Smrg
23447ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
23457ec681f3Smrg{
23467ec681f3Smrg   LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", ctx->v2i16, args, 2,
23477ec681f3Smrg                                         AC_FUNC_ATTR_READNONE);
23487ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
23497ec681f3Smrg}
23507ec681f3Smrg
23517ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_i16_f16(struct ac_llvm_context *ctx,
23527ec681f3Smrg                                         LLVMValueRef args[2])
23537ec681f3Smrg{
23547ec681f3Smrg   LLVMTypeRef param_types[] = {ctx->f16, ctx->f16};
23557ec681f3Smrg   LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false);
23567ec681f3Smrg   LLVMValueRef code = LLVMConstInlineAsm(calltype,
23577ec681f3Smrg                                          "v_cvt_pknorm_i16_f16 $0, $1, $2", "=v,v,v",
23587ec681f3Smrg                                          false, false);
23597ec681f3Smrg   return LLVMBuildCall(ctx->builder, code, args, 2, "");
23607ec681f3Smrg}
23617ec681f3Smrg
23627ec681f3SmrgLLVMValueRef ac_build_cvt_pknorm_u16_f16(struct ac_llvm_context *ctx,
23637ec681f3Smrg                                         LLVMValueRef args[2])
23647ec681f3Smrg{
23657ec681f3Smrg   LLVMTypeRef param_types[] = {ctx->f16, ctx->f16};
23667ec681f3Smrg   LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false);
23677ec681f3Smrg   LLVMValueRef code = LLVMConstInlineAsm(calltype,
23687ec681f3Smrg                                          "v_cvt_pknorm_u16_f16 $0, $1, $2", "=v,v,v",
23697ec681f3Smrg                                          false, false);
23707ec681f3Smrg   return LLVMBuildCall(ctx->builder, code, args, 2, "");
23717ec681f3Smrg}
23727ec681f3Smrg
23737ec681f3Smrg/* The 8-bit and 10-bit clamping is for HW workarounds. */
23747ec681f3SmrgLLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
23757ec681f3Smrg                                 bool hi)
23767ec681f3Smrg{
23777ec681f3Smrg   assert(bits == 8 || bits == 10 || bits == 16);
23787ec681f3Smrg
23797ec681f3Smrg   LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
23807ec681f3Smrg   LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
23817ec681f3Smrg   LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1;
23827ec681f3Smrg   LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
23837ec681f3Smrg
23847ec681f3Smrg   /* Clamp. */
23857ec681f3Smrg   if (bits != 16) {
23867ec681f3Smrg      for (int i = 0; i < 2; i++) {
23877ec681f3Smrg         bool alpha = hi && i == 1;
23887ec681f3Smrg         args[i] = ac_build_imin(ctx, args[i], alpha ? max_alpha : max_rgb);
23897ec681f3Smrg         args[i] = ac_build_imax(ctx, args[i], alpha ? min_alpha : min_rgb);
23907ec681f3Smrg      }
23917ec681f3Smrg   }
23927ec681f3Smrg
23937ec681f3Smrg   LLVMValueRef res =
23947ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE);
23957ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
23967ec681f3Smrg}
23977ec681f3Smrg
23987ec681f3Smrg/* The 8-bit and 10-bit clamping is for HW workarounds. */
23997ec681f3SmrgLLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
24007ec681f3Smrg                                 bool hi)
24017ec681f3Smrg{
24027ec681f3Smrg   assert(bits == 8 || bits == 10 || bits == 16);
24037ec681f3Smrg
24047ec681f3Smrg   LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
24057ec681f3Smrg   LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
24067ec681f3Smrg
24077ec681f3Smrg   /* Clamp. */
24087ec681f3Smrg   if (bits != 16) {
24097ec681f3Smrg      for (int i = 0; i < 2; i++) {
24107ec681f3Smrg         bool alpha = hi && i == 1;
24117ec681f3Smrg         args[i] = ac_build_umin(ctx, args[i], alpha ? max_alpha : max_rgb);
24127ec681f3Smrg      }
24137ec681f3Smrg   }
24147ec681f3Smrg
24157ec681f3Smrg   LLVMValueRef res =
24167ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE);
24177ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
24187ec681f3Smrg}
24197ec681f3Smrg
24207ec681f3SmrgLLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
24217ec681f3Smrg{
24227ec681f3Smrg   return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, AC_FUNC_ATTR_READNONE);
24237ec681f3Smrg}
24247ec681f3Smrg
24257ec681f3Smrgvoid ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
24267ec681f3Smrg{
24277ec681f3Smrg   ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0);
24287ec681f3Smrg}
24297ec681f3Smrg
24307ec681f3SmrgLLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset,
24317ec681f3Smrg                          LLVMValueRef width, bool is_signed)
24327ec681f3Smrg{
24337ec681f3Smrg   LLVMValueRef args[] = {
24347ec681f3Smrg      input,
24357ec681f3Smrg      offset,
24367ec681f3Smrg      width,
24377ec681f3Smrg   };
24387ec681f3Smrg
24397ec681f3Smrg   return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : "llvm.amdgcn.ubfe.i32",
24407ec681f3Smrg                             ctx->i32, args, 3, AC_FUNC_ATTR_READNONE);
24417ec681f3Smrg}
24427ec681f3Smrg
24437ec681f3SmrgLLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
24447ec681f3Smrg                           LLVMValueRef s2)
24457ec681f3Smrg{
24467ec681f3Smrg   return LLVMBuildAdd(ctx->builder, LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
24477ec681f3Smrg}
24487ec681f3Smrg
24497ec681f3SmrgLLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
24507ec681f3Smrg                           LLVMValueRef s2)
24517ec681f3Smrg{
24527ec681f3Smrg   /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
24537ec681f3Smrg   if (ctx->chip_class >= GFX10) {
24547ec681f3Smrg      return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, (LLVMValueRef[]){s0, s1, s2}, 3,
24557ec681f3Smrg                                AC_FUNC_ATTR_READNONE);
24567ec681f3Smrg   }
24577ec681f3Smrg
24587ec681f3Smrg   return LLVMBuildFAdd(ctx->builder, LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
24597ec681f3Smrg}
24607ec681f3Smrg
24617ec681f3Smrgvoid ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
24627ec681f3Smrg{
24637ec681f3Smrg   if (!wait_flags)
24647ec681f3Smrg      return;
24657ec681f3Smrg
24667ec681f3Smrg   unsigned lgkmcnt = 63;
24677ec681f3Smrg   unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
24687ec681f3Smrg   unsigned vscnt = 63;
24697ec681f3Smrg
24707ec681f3Smrg   if (wait_flags & AC_WAIT_LGKM)
24717ec681f3Smrg      lgkmcnt = 0;
24727ec681f3Smrg   if (wait_flags & AC_WAIT_VLOAD)
24737ec681f3Smrg      vmcnt = 0;
24747ec681f3Smrg
24757ec681f3Smrg   if (wait_flags & AC_WAIT_VSTORE) {
24767ec681f3Smrg      if (ctx->chip_class >= GFX10)
24777ec681f3Smrg         vscnt = 0;
24787ec681f3Smrg      else
24797ec681f3Smrg         vmcnt = 0;
24807ec681f3Smrg   }
24817ec681f3Smrg
24827ec681f3Smrg   /* There is no intrinsic for vscnt(0), so use a fence. */
24837ec681f3Smrg   if ((wait_flags & AC_WAIT_LGKM && wait_flags & AC_WAIT_VLOAD && wait_flags & AC_WAIT_VSTORE) ||
24847ec681f3Smrg       vscnt == 0) {
24857ec681f3Smrg      LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
24867ec681f3Smrg      return;
24877ec681f3Smrg   }
24887ec681f3Smrg
24897ec681f3Smrg   unsigned simm16 = (lgkmcnt << 8) | (7 << 4) | /* expcnt */
24907ec681f3Smrg                     (vmcnt & 0xf) | ((vmcnt >> 4) << 14);
24917ec681f3Smrg
24927ec681f3Smrg   LLVMValueRef args[1] = {
24937ec681f3Smrg      LLVMConstInt(ctx->i32, simm16, false),
24947ec681f3Smrg   };
24957ec681f3Smrg   ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0);
24967ec681f3Smrg}
24977ec681f3Smrg
24987ec681f3SmrgLLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
24997ec681f3Smrg                           LLVMTypeRef type)
25007ec681f3Smrg{
25017ec681f3Smrg   unsigned bitsize = ac_get_elem_bits(ctx, type);
25027ec681f3Smrg   LLVMValueRef zero = LLVMConstReal(type, 0.0);
25037ec681f3Smrg   LLVMValueRef one = LLVMConstReal(type, 1.0);
25047ec681f3Smrg   LLVMValueRef result;
25057ec681f3Smrg
25067ec681f3Smrg   if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8) || type == ctx->v2f16) {
25077ec681f3Smrg      /* Use fmin/fmax for 64-bit fsat or 16-bit on GFX6-GFX8 because LLVM
25087ec681f3Smrg       * doesn't expose an intrinsic.
25097ec681f3Smrg       */
25107ec681f3Smrg      result = ac_build_fmin(ctx, ac_build_fmax(ctx, src, zero), one);
25117ec681f3Smrg   } else {
25127ec681f3Smrg      LLVMTypeRef type;
25137ec681f3Smrg      char *intr;
25147ec681f3Smrg
25157ec681f3Smrg      if (bitsize == 16) {
25167ec681f3Smrg         intr = "llvm.amdgcn.fmed3.f16";
25177ec681f3Smrg         type = ctx->f16;
25187ec681f3Smrg      } else {
25197ec681f3Smrg         assert(bitsize == 32);
25207ec681f3Smrg         intr = "llvm.amdgcn.fmed3.f32";
25217ec681f3Smrg         type = ctx->f32;
25227ec681f3Smrg      }
25237ec681f3Smrg
25247ec681f3Smrg      LLVMValueRef params[] = {
25257ec681f3Smrg         zero,
25267ec681f3Smrg         one,
25277ec681f3Smrg         src,
25287ec681f3Smrg      };
25297ec681f3Smrg
25307ec681f3Smrg      result = ac_build_intrinsic(ctx, intr, type, params, 3,
25317ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
25327ec681f3Smrg   }
25337ec681f3Smrg
25347ec681f3Smrg   if (ctx->chip_class < GFX9 && bitsize == 32) {
25357ec681f3Smrg      /* Only pre-GFX9 chips do not flush denorms. */
25367ec681f3Smrg      result = ac_build_canonicalize(ctx, result, bitsize);
25377ec681f3Smrg   }
25387ec681f3Smrg
25397ec681f3Smrg   return result;
25407ec681f3Smrg}
25417ec681f3Smrg
25427ec681f3SmrgLLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
25437ec681f3Smrg{
25447ec681f3Smrg   LLVMTypeRef type;
25457ec681f3Smrg   char *intr;
25467ec681f3Smrg
25477ec681f3Smrg   if (bitsize == 16) {
25487ec681f3Smrg      intr = "llvm.amdgcn.fract.f16";
25497ec681f3Smrg      type = ctx->f16;
25507ec681f3Smrg   } else if (bitsize == 32) {
25517ec681f3Smrg      intr = "llvm.amdgcn.fract.f32";
25527ec681f3Smrg      type = ctx->f32;
25537ec681f3Smrg   } else {
25547ec681f3Smrg      intr = "llvm.amdgcn.fract.f64";
25557ec681f3Smrg      type = ctx->f64;
25567ec681f3Smrg   }
25577ec681f3Smrg
25587ec681f3Smrg   LLVMValueRef params[] = {
25597ec681f3Smrg      src0,
25607ec681f3Smrg   };
25617ec681f3Smrg   return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
25627ec681f3Smrg}
25637ec681f3Smrg
25647ec681f3SmrgLLVMValueRef ac_const_uint_vec(struct ac_llvm_context *ctx, LLVMTypeRef type, uint64_t value)
25657ec681f3Smrg{
25667ec681f3Smrg
25677ec681f3Smrg   if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
25687ec681f3Smrg      LLVMValueRef scalar = LLVMConstInt(LLVMGetElementType(type), value, 0);
25697ec681f3Smrg      unsigned vec_size = LLVMGetVectorSize(type);
25707ec681f3Smrg      LLVMValueRef *scalars = alloca(vec_size * sizeof(LLVMValueRef));
25717ec681f3Smrg
25727ec681f3Smrg      for (unsigned i = 0; i < vec_size; i++)
25737ec681f3Smrg         scalars[i] = scalar;
25747ec681f3Smrg      return LLVMConstVector(scalars, vec_size);
25757ec681f3Smrg   }
25767ec681f3Smrg   return LLVMConstInt(type, value, 0);
25777ec681f3Smrg}
25787ec681f3Smrg
25797ec681f3SmrgLLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0)
25807ec681f3Smrg{
25817ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(src0);
25827ec681f3Smrg   LLVMValueRef val;
25837ec681f3Smrg
25847ec681f3Smrg   /* v_med3 is selected only when max is first. (LLVM bug?) */
25857ec681f3Smrg   val = ac_build_imax(ctx, src0, ac_const_uint_vec(ctx, type, -1));
25867ec681f3Smrg   return ac_build_imin(ctx, val, ac_const_uint_vec(ctx, type, 1));
25877ec681f3Smrg}
25887ec681f3Smrg
25897ec681f3Smrgstatic LLVMValueRef ac_eliminate_negative_zero(struct ac_llvm_context *ctx, LLVMValueRef val)
25907ec681f3Smrg{
25917ec681f3Smrg   ac_enable_signed_zeros(ctx);
25927ec681f3Smrg   /* (val + 0) converts negative zero to positive zero. */
25937ec681f3Smrg   val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), "");
25947ec681f3Smrg   ac_disable_signed_zeros(ctx);
25957ec681f3Smrg   return val;
25967ec681f3Smrg}
25977ec681f3Smrg
25987ec681f3SmrgLLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src)
25997ec681f3Smrg{
26007ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(src);
26017ec681f3Smrg   LLVMValueRef pos, neg, dw[2], val;
26027ec681f3Smrg   unsigned bitsize = ac_get_elem_bits(ctx, type);
26037ec681f3Smrg
26047ec681f3Smrg   /* The standard version leads to this:
26057ec681f3Smrg    *   v_cmp_ngt_f32_e64 s[0:1], s4, 0                       ; D40B0000 00010004
26067ec681f3Smrg    *   v_cndmask_b32_e64 v4, 1.0, s4, s[0:1]                 ; D5010004 000008F2
26077ec681f3Smrg    *   v_cmp_le_f32_e32 vcc, 0, v4                           ; 7C060880
26087ec681f3Smrg    *   v_cndmask_b32_e32 v4, -1.0, v4, vcc                   ; 020808F3
26097ec681f3Smrg    *
26107ec681f3Smrg    * The isign version:
26117ec681f3Smrg    *   v_add_f32_e64 v4, s4, 0                               ; D5030004 00010004
26127ec681f3Smrg    *   v_med3_i32 v4, v4, -1, 1                              ; D5580004 02058304
26137ec681f3Smrg    *   v_cvt_f32_i32_e32 v4, v4                              ; 7E080B04
26147ec681f3Smrg    *
26157ec681f3Smrg    * (src0 + 0) converts negative zero to positive zero.
26167ec681f3Smrg    * After that, int(fsign(x)) == isign(floatBitsToInt(x)).
26177ec681f3Smrg    *
26187ec681f3Smrg    * For FP64, use the standard version, which doesn't suffer from the huge DP rate
26197ec681f3Smrg    * reduction. (FP64 comparisons are as fast as int64 comparisons)
26207ec681f3Smrg    */
26217ec681f3Smrg   if (bitsize == 16 || bitsize == 32) {
26227ec681f3Smrg      val = ac_to_integer(ctx, ac_eliminate_negative_zero(ctx, src));
26237ec681f3Smrg      val = ac_build_isign(ctx, val);
26247ec681f3Smrg      return LLVMBuildSIToFP(ctx->builder, val, type, "");
26257ec681f3Smrg   }
26267ec681f3Smrg
26277ec681f3Smrg   assert(bitsize == 64);
26287ec681f3Smrg   pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, "");
26297ec681f3Smrg   neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, "");
26307ec681f3Smrg   dw[0] = ctx->i32_0;
26317ec681f3Smrg   dw[1] = LLVMBuildSelect(
26327ec681f3Smrg      ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0),
26337ec681f3Smrg      LLVMBuildSelect(ctx->builder, neg, LLVMConstInt(ctx->i32, 0xBFF00000, 0), ctx->i32_0, ""),
26347ec681f3Smrg      "");
26357ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, ac_build_gather_values(ctx, dw, 2), ctx->f64, "");
26367ec681f3Smrg}
26377ec681f3Smrg
26387ec681f3SmrgLLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
26397ec681f3Smrg{
26407ec681f3Smrg   LLVMValueRef result;
26417ec681f3Smrg   unsigned bitsize;
26427ec681f3Smrg
26437ec681f3Smrg   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
26447ec681f3Smrg
26457ec681f3Smrg   switch (bitsize) {
26467ec681f3Smrg   case 128:
26477ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, (LLVMValueRef[]){src0}, 1,
26487ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
26497ec681f3Smrg      result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
26507ec681f3Smrg      break;
26517ec681f3Smrg   case 64:
26527ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
26537ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
26547ec681f3Smrg
26557ec681f3Smrg      result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
26567ec681f3Smrg      break;
26577ec681f3Smrg   case 32:
26587ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
26597ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
26607ec681f3Smrg      break;
26617ec681f3Smrg   case 16:
26627ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
26637ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
26647ec681f3Smrg
26657ec681f3Smrg      result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
26667ec681f3Smrg      break;
26677ec681f3Smrg   case 8:
26687ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
26697ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
26707ec681f3Smrg
26717ec681f3Smrg      result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
26727ec681f3Smrg      break;
26737ec681f3Smrg   default:
26747ec681f3Smrg      unreachable(!"invalid bitsize");
26757ec681f3Smrg      break;
26767ec681f3Smrg   }
26777ec681f3Smrg
26787ec681f3Smrg   return result;
26797ec681f3Smrg}
26807ec681f3Smrg
26817ec681f3SmrgLLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0)
26827ec681f3Smrg{
26837ec681f3Smrg   LLVMValueRef result;
26847ec681f3Smrg   unsigned bitsize;
26857ec681f3Smrg
26867ec681f3Smrg   bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
26877ec681f3Smrg
26887ec681f3Smrg   switch (bitsize) {
26897ec681f3Smrg   case 64:
26907ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
26917ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
26927ec681f3Smrg
26937ec681f3Smrg      result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
26947ec681f3Smrg      break;
26957ec681f3Smrg   case 32:
26967ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
26977ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
26987ec681f3Smrg      break;
26997ec681f3Smrg   case 16:
27007ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
27017ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
27027ec681f3Smrg
27037ec681f3Smrg      result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
27047ec681f3Smrg      break;
27057ec681f3Smrg   case 8:
27067ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
27077ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
27087ec681f3Smrg
27097ec681f3Smrg      result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
27107ec681f3Smrg      break;
27117ec681f3Smrg   default:
27127ec681f3Smrg      unreachable(!"invalid bitsize");
27137ec681f3Smrg      break;
27147ec681f3Smrg   }
27157ec681f3Smrg
27167ec681f3Smrg   return result;
27177ec681f3Smrg}
27187ec681f3Smrg
27197ec681f3Smrg#define AC_EXP_TARGET           0
27207ec681f3Smrg#define AC_EXP_ENABLED_CHANNELS 1
27217ec681f3Smrg#define AC_EXP_OUT0             2
27227ec681f3Smrg
27237ec681f3Smrgenum ac_ir_type
27247ec681f3Smrg{
27257ec681f3Smrg   AC_IR_UNDEF,
27267ec681f3Smrg   AC_IR_CONST,
27277ec681f3Smrg   AC_IR_VALUE,
27287ec681f3Smrg};
27297ec681f3Smrg
27307ec681f3Smrgstruct ac_vs_exp_chan {
27317ec681f3Smrg   LLVMValueRef value;
27327ec681f3Smrg   float const_float;
27337ec681f3Smrg   enum ac_ir_type type;
27347ec681f3Smrg};
27357ec681f3Smrg
27367ec681f3Smrgstruct ac_vs_exp_inst {
27377ec681f3Smrg   unsigned offset;
27387ec681f3Smrg   LLVMValueRef inst;
27397ec681f3Smrg   struct ac_vs_exp_chan chan[4];
27407ec681f3Smrg};
27417ec681f3Smrg
27427ec681f3Smrgstruct ac_vs_exports {
27437ec681f3Smrg   unsigned num;
27447ec681f3Smrg   struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
27457ec681f3Smrg};
27467ec681f3Smrg
27477ec681f3Smrg/* Return true if the PARAM export has been eliminated. */
27487ec681f3Smrgstatic bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, uint32_t num_outputs,
27497ec681f3Smrg                                      struct ac_vs_exp_inst *exp)
27507ec681f3Smrg{
27517ec681f3Smrg   unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
27527ec681f3Smrg   bool is_zero[4] = {0}, is_one[4] = {0};
27537ec681f3Smrg
27547ec681f3Smrg   for (i = 0; i < 4; i++) {
27557ec681f3Smrg      /* It's a constant expression. Undef outputs are eliminated too. */
27567ec681f3Smrg      if (exp->chan[i].type == AC_IR_UNDEF) {
27577ec681f3Smrg         is_zero[i] = true;
27587ec681f3Smrg         is_one[i] = true;
27597ec681f3Smrg      } else if (exp->chan[i].type == AC_IR_CONST) {
27607ec681f3Smrg         if (exp->chan[i].const_float == 0)
27617ec681f3Smrg            is_zero[i] = true;
27627ec681f3Smrg         else if (exp->chan[i].const_float == 1)
27637ec681f3Smrg            is_one[i] = true;
27647ec681f3Smrg         else
27657ec681f3Smrg            return false; /* other constant */
27667ec681f3Smrg      } else
27677ec681f3Smrg         return false;
27687ec681f3Smrg   }
27697ec681f3Smrg
27707ec681f3Smrg   /* Only certain combinations of 0 and 1 can be eliminated. */
27717ec681f3Smrg   if (is_zero[0] && is_zero[1] && is_zero[2])
27727ec681f3Smrg      default_val = is_zero[3] ? 0 : 1;
27737ec681f3Smrg   else if (is_one[0] && is_one[1] && is_one[2])
27747ec681f3Smrg      default_val = is_zero[3] ? 2 : 3;
27757ec681f3Smrg   else
27767ec681f3Smrg      return false;
27777ec681f3Smrg
27787ec681f3Smrg   /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
27797ec681f3Smrg   LLVMInstructionEraseFromParent(exp->inst);
27807ec681f3Smrg
27817ec681f3Smrg   /* Change OFFSET to DEFAULT_VAL. */
27827ec681f3Smrg   for (i = 0; i < num_outputs; i++) {
27837ec681f3Smrg      if (vs_output_param_offset[i] == exp->offset) {
27847ec681f3Smrg         vs_output_param_offset[i] = AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
27857ec681f3Smrg         break;
27867ec681f3Smrg      }
27877ec681f3Smrg   }
27887ec681f3Smrg   return true;
27897ec681f3Smrg}
27907ec681f3Smrg
27917ec681f3Smrgstatic bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx,
27927ec681f3Smrg                                           uint8_t *vs_output_param_offset, uint32_t num_outputs,
27937ec681f3Smrg                                           struct ac_vs_exports *processed,
27947ec681f3Smrg                                           struct ac_vs_exp_inst *exp)
27957ec681f3Smrg{
27967ec681f3Smrg   unsigned p, copy_back_channels = 0;
27977ec681f3Smrg
27987ec681f3Smrg   /* See if the output is already in the list of processed outputs.
27997ec681f3Smrg    * The LLVMValueRef comparison relies on SSA.
28007ec681f3Smrg    */
28017ec681f3Smrg   for (p = 0; p < processed->num; p++) {
28027ec681f3Smrg      bool different = false;
28037ec681f3Smrg
28047ec681f3Smrg      for (unsigned j = 0; j < 4; j++) {
28057ec681f3Smrg         struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
28067ec681f3Smrg         struct ac_vs_exp_chan *c2 = &exp->chan[j];
28077ec681f3Smrg
28087ec681f3Smrg         /* Treat undef as a match. */
28097ec681f3Smrg         if (c2->type == AC_IR_UNDEF)
28107ec681f3Smrg            continue;
28117ec681f3Smrg
28127ec681f3Smrg         /* If c1 is undef but c2 isn't, we can copy c2 to c1
28137ec681f3Smrg          * and consider the instruction duplicated.
28147ec681f3Smrg          */
28157ec681f3Smrg         if (c1->type == AC_IR_UNDEF) {
28167ec681f3Smrg            copy_back_channels |= 1 << j;
28177ec681f3Smrg            continue;
28187ec681f3Smrg         }
28197ec681f3Smrg
28207ec681f3Smrg         /* Test whether the channels are not equal. */
28217ec681f3Smrg         if (c1->type != c2->type ||
28227ec681f3Smrg             (c1->type == AC_IR_CONST && c1->const_float != c2->const_float) ||
28237ec681f3Smrg             (c1->type == AC_IR_VALUE && c1->value != c2->value)) {
28247ec681f3Smrg            different = true;
28257ec681f3Smrg            break;
28267ec681f3Smrg         }
28277ec681f3Smrg      }
28287ec681f3Smrg      if (!different)
28297ec681f3Smrg         break;
28307ec681f3Smrg
28317ec681f3Smrg      copy_back_channels = 0;
28327ec681f3Smrg   }
28337ec681f3Smrg   if (p == processed->num)
28347ec681f3Smrg      return false;
28357ec681f3Smrg
28367ec681f3Smrg   /* If a match was found, but the matching export has undef where the new
28377ec681f3Smrg    * one has a normal value, copy the normal value to the undef channel.
28387ec681f3Smrg    */
28397ec681f3Smrg   struct ac_vs_exp_inst *match = &processed->exp[p];
28407ec681f3Smrg
28417ec681f3Smrg   /* Get current enabled channels mask. */
28427ec681f3Smrg   LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
28437ec681f3Smrg   unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
28447ec681f3Smrg
28457ec681f3Smrg   while (copy_back_channels) {
28467ec681f3Smrg      unsigned chan = u_bit_scan(&copy_back_channels);
28477ec681f3Smrg
28487ec681f3Smrg      assert(match->chan[chan].type == AC_IR_UNDEF);
28497ec681f3Smrg      LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, exp->chan[chan].value);
28507ec681f3Smrg      match->chan[chan] = exp->chan[chan];
28517ec681f3Smrg
28527ec681f3Smrg      /* Update number of enabled channels because the original mask
28537ec681f3Smrg       * is not always 0xf.
28547ec681f3Smrg       */
28557ec681f3Smrg      enabled_channels |= (1 << chan);
28567ec681f3Smrg      LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
28577ec681f3Smrg                     LLVMConstInt(ctx->i32, enabled_channels, 0));
28587ec681f3Smrg   }
28597ec681f3Smrg
28607ec681f3Smrg   /* The PARAM export is duplicated. Kill it. */
28617ec681f3Smrg   LLVMInstructionEraseFromParent(exp->inst);
28627ec681f3Smrg
28637ec681f3Smrg   /* Change OFFSET to the matching export. */
28647ec681f3Smrg   for (unsigned i = 0; i < num_outputs; i++) {
28657ec681f3Smrg      if (vs_output_param_offset[i] == exp->offset) {
28667ec681f3Smrg         vs_output_param_offset[i] = match->offset;
28677ec681f3Smrg         break;
28687ec681f3Smrg      }
28697ec681f3Smrg   }
28707ec681f3Smrg   return true;
28717ec681f3Smrg}
28727ec681f3Smrg
28737ec681f3Smrgvoid ac_optimize_vs_outputs(struct ac_llvm_context *ctx, LLVMValueRef main_fn,
28747ec681f3Smrg                            uint8_t *vs_output_param_offset, uint32_t num_outputs,
28757ec681f3Smrg                            uint32_t skip_output_mask, uint8_t *num_param_exports)
28767ec681f3Smrg{
28777ec681f3Smrg   LLVMBasicBlockRef bb;
28787ec681f3Smrg   bool removed_any = false;
28797ec681f3Smrg   struct ac_vs_exports exports;
28807ec681f3Smrg
28817ec681f3Smrg   exports.num = 0;
28827ec681f3Smrg
28837ec681f3Smrg   /* Process all LLVM instructions. */
28847ec681f3Smrg   bb = LLVMGetFirstBasicBlock(main_fn);
28857ec681f3Smrg   while (bb) {
28867ec681f3Smrg      LLVMValueRef inst = LLVMGetFirstInstruction(bb);
28877ec681f3Smrg
28887ec681f3Smrg      while (inst) {
28897ec681f3Smrg         LLVMValueRef cur = inst;
28907ec681f3Smrg         inst = LLVMGetNextInstruction(inst);
28917ec681f3Smrg         struct ac_vs_exp_inst exp;
28927ec681f3Smrg
28937ec681f3Smrg         if (LLVMGetInstructionOpcode(cur) != LLVMCall)
28947ec681f3Smrg            continue;
28957ec681f3Smrg
28967ec681f3Smrg         LLVMValueRef callee = ac_llvm_get_called_value(cur);
28977ec681f3Smrg
28987ec681f3Smrg         if (!ac_llvm_is_function(callee))
28997ec681f3Smrg            continue;
29007ec681f3Smrg
29017ec681f3Smrg         const char *name = LLVMGetValueName(callee);
29027ec681f3Smrg         unsigned num_args = LLVMCountParams(callee);
29037ec681f3Smrg
29047ec681f3Smrg         /* Check if this is an export instruction. */
29057ec681f3Smrg         if ((num_args != 9 && num_args != 8) ||
29067ec681f3Smrg             (strcmp(name, "llvm.SI.export") && strcmp(name, "llvm.amdgcn.exp.f32")))
29077ec681f3Smrg            continue;
29087ec681f3Smrg
29097ec681f3Smrg         LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
29107ec681f3Smrg         unsigned target = LLVMConstIntGetZExtValue(arg);
29117ec681f3Smrg
29127ec681f3Smrg         if (target < V_008DFC_SQ_EXP_PARAM)
29137ec681f3Smrg            continue;
29147ec681f3Smrg
29157ec681f3Smrg         target -= V_008DFC_SQ_EXP_PARAM;
29167ec681f3Smrg
29177ec681f3Smrg         /* Parse the instruction. */
29187ec681f3Smrg         memset(&exp, 0, sizeof(exp));
29197ec681f3Smrg         exp.offset = target;
29207ec681f3Smrg         exp.inst = cur;
29217ec681f3Smrg
29227ec681f3Smrg         for (unsigned i = 0; i < 4; i++) {
29237ec681f3Smrg            LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
29247ec681f3Smrg
29257ec681f3Smrg            exp.chan[i].value = v;
29267ec681f3Smrg
29277ec681f3Smrg            if (LLVMIsUndef(v)) {
29287ec681f3Smrg               exp.chan[i].type = AC_IR_UNDEF;
29297ec681f3Smrg            } else if (LLVMIsAConstantFP(v)) {
29307ec681f3Smrg               LLVMBool loses_info;
29317ec681f3Smrg               exp.chan[i].type = AC_IR_CONST;
29327ec681f3Smrg               exp.chan[i].const_float = LLVMConstRealGetDouble(v, &loses_info);
29337ec681f3Smrg            } else {
29347ec681f3Smrg               exp.chan[i].type = AC_IR_VALUE;
29357ec681f3Smrg            }
29367ec681f3Smrg         }
29377ec681f3Smrg
29387ec681f3Smrg         /* Eliminate constant and duplicated PARAM exports. */
29397ec681f3Smrg         if (!((1u << target) & skip_output_mask) &&
29407ec681f3Smrg             (ac_eliminate_const_output(vs_output_param_offset, num_outputs, &exp) ||
29417ec681f3Smrg              ac_eliminate_duplicated_output(ctx, vs_output_param_offset, num_outputs, &exports,
29427ec681f3Smrg                                             &exp))) {
29437ec681f3Smrg            removed_any = true;
29447ec681f3Smrg         } else {
29457ec681f3Smrg            exports.exp[exports.num++] = exp;
29467ec681f3Smrg         }
29477ec681f3Smrg      }
29487ec681f3Smrg      bb = LLVMGetNextBasicBlock(bb);
29497ec681f3Smrg   }
29507ec681f3Smrg
29517ec681f3Smrg   /* Remove holes in export memory due to removed PARAM exports.
29527ec681f3Smrg    * This is done by renumbering all PARAM exports.
29537ec681f3Smrg    */
29547ec681f3Smrg   if (removed_any) {
29557ec681f3Smrg      uint8_t old_offset[VARYING_SLOT_MAX];
29567ec681f3Smrg      unsigned out, i;
29577ec681f3Smrg
29587ec681f3Smrg      /* Make a copy of the offsets. We need the old version while
29597ec681f3Smrg       * we are modifying some of them. */
29607ec681f3Smrg      memcpy(old_offset, vs_output_param_offset, sizeof(old_offset));
29617ec681f3Smrg
29627ec681f3Smrg      for (i = 0; i < exports.num; i++) {
29637ec681f3Smrg         unsigned offset = exports.exp[i].offset;
29647ec681f3Smrg
29657ec681f3Smrg         /* Update vs_output_param_offset. Multiple outputs can
29667ec681f3Smrg          * have the same offset.
29677ec681f3Smrg          */
29687ec681f3Smrg         for (out = 0; out < num_outputs; out++) {
29697ec681f3Smrg            if (old_offset[out] == offset)
29707ec681f3Smrg               vs_output_param_offset[out] = i;
29717ec681f3Smrg         }
29727ec681f3Smrg
29737ec681f3Smrg         /* Change the PARAM offset in the instruction. */
29747ec681f3Smrg         LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
29757ec681f3Smrg                        LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_PARAM + i, 0));
29767ec681f3Smrg      }
29777ec681f3Smrg      *num_param_exports = exports.num;
29787ec681f3Smrg   }
29797ec681f3Smrg}
29807ec681f3Smrg
29817ec681f3Smrgvoid ac_init_exec_full_mask(struct ac_llvm_context *ctx)
29827ec681f3Smrg{
29837ec681f3Smrg   LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
29847ec681f3Smrg   ac_build_intrinsic(ctx, "llvm.amdgcn.init.exec", ctx->voidt, &full_mask, 1,
29857ec681f3Smrg                      AC_FUNC_ATTR_CONVERGENT);
29867ec681f3Smrg}
29877ec681f3Smrg
29887ec681f3Smrgvoid ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
29897ec681f3Smrg{
29907ec681f3Smrg   unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
29917ec681f3Smrg   ctx->lds = LLVMBuildIntToPtr(
29927ec681f3Smrg      ctx->builder, ctx->i32_0,
29937ec681f3Smrg      LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds");
29947ec681f3Smrg}
29957ec681f3Smrg
29967ec681f3SmrgLLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr)
29977ec681f3Smrg{
29987ec681f3Smrg   return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), "");
29997ec681f3Smrg}
30007ec681f3Smrg
30017ec681f3Smrgvoid ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value)
30027ec681f3Smrg{
30037ec681f3Smrg   value = ac_to_integer(ctx, value);
30047ec681f3Smrg   ac_build_indexed_store(ctx, ctx->lds, dw_addr, value);
30057ec681f3Smrg}
30067ec681f3Smrg
30077ec681f3SmrgLLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0)
30087ec681f3Smrg{
30097ec681f3Smrg   unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
30107ec681f3Smrg   const char *intrin_name;
30117ec681f3Smrg   LLVMTypeRef type;
30127ec681f3Smrg   LLVMValueRef zero;
30137ec681f3Smrg
30147ec681f3Smrg   switch (src0_bitsize) {
30157ec681f3Smrg   case 64:
30167ec681f3Smrg      intrin_name = "llvm.cttz.i64";
30177ec681f3Smrg      type = ctx->i64;
30187ec681f3Smrg      zero = ctx->i64_0;
30197ec681f3Smrg      break;
30207ec681f3Smrg   case 32:
30217ec681f3Smrg      intrin_name = "llvm.cttz.i32";
30227ec681f3Smrg      type = ctx->i32;
30237ec681f3Smrg      zero = ctx->i32_0;
30247ec681f3Smrg      break;
30257ec681f3Smrg   case 16:
30267ec681f3Smrg      intrin_name = "llvm.cttz.i16";
30277ec681f3Smrg      type = ctx->i16;
30287ec681f3Smrg      zero = ctx->i16_0;
30297ec681f3Smrg      break;
30307ec681f3Smrg   case 8:
30317ec681f3Smrg      intrin_name = "llvm.cttz.i8";
30327ec681f3Smrg      type = ctx->i8;
30337ec681f3Smrg      zero = ctx->i8_0;
30347ec681f3Smrg      break;
30357ec681f3Smrg   default:
30367ec681f3Smrg      unreachable(!"invalid bitsize");
30377ec681f3Smrg   }
30387ec681f3Smrg
30397ec681f3Smrg   LLVMValueRef params[2] = {
30407ec681f3Smrg      src0,
30417ec681f3Smrg
30427ec681f3Smrg      /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
30437ec681f3Smrg       * add special code to check for x=0. The reason is that
30447ec681f3Smrg       * the LLVM behavior for x=0 is different from what we
30457ec681f3Smrg       * need here. However, LLVM also assumes that ffs(x) is
30467ec681f3Smrg       * in [0, 31], but GLSL expects that ffs(0) = -1, so
30477ec681f3Smrg       * a conditional assignment to handle 0 is still required.
30487ec681f3Smrg       *
30497ec681f3Smrg       * The hardware already implements the correct behavior.
30507ec681f3Smrg       */
30517ec681f3Smrg      ctx->i1true,
30527ec681f3Smrg   };
30537ec681f3Smrg
30547ec681f3Smrg   LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE);
30557ec681f3Smrg
30567ec681f3Smrg   if (src0_bitsize == 64) {
30577ec681f3Smrg      lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
30587ec681f3Smrg   } else if (src0_bitsize < 32) {
30597ec681f3Smrg      lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
30607ec681f3Smrg   }
30617ec681f3Smrg
30627ec681f3Smrg   /* TODO: We need an intrinsic to skip this conditional. */
30637ec681f3Smrg   /* Check for zero: */
30647ec681f3Smrg   return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, zero, ""),
30657ec681f3Smrg                          LLVMConstInt(ctx->i32, -1, 0), lsb, "");
30667ec681f3Smrg}
30677ec681f3Smrg
30687ec681f3SmrgLLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
30697ec681f3Smrg{
30707ec681f3Smrg   return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST);
30717ec681f3Smrg}
30727ec681f3Smrg
30737ec681f3SmrgLLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
30747ec681f3Smrg{
30757ec681f3Smrg   return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT);
30767ec681f3Smrg}
30777ec681f3Smrg
30787ec681f3Smrgstatic struct ac_llvm_flow *get_current_flow(struct ac_llvm_context *ctx)
30797ec681f3Smrg{
30807ec681f3Smrg   if (ctx->flow->depth > 0)
30817ec681f3Smrg      return &ctx->flow->stack[ctx->flow->depth - 1];
30827ec681f3Smrg   return NULL;
30837ec681f3Smrg}
30847ec681f3Smrg
30857ec681f3Smrgstatic struct ac_llvm_flow *get_innermost_loop(struct ac_llvm_context *ctx)
30867ec681f3Smrg{
30877ec681f3Smrg   for (unsigned i = ctx->flow->depth; i > 0; --i) {
30887ec681f3Smrg      if (ctx->flow->stack[i - 1].loop_entry_block)
30897ec681f3Smrg         return &ctx->flow->stack[i - 1];
30907ec681f3Smrg   }
30917ec681f3Smrg   return NULL;
30927ec681f3Smrg}
30937ec681f3Smrg
30947ec681f3Smrgstatic struct ac_llvm_flow *push_flow(struct ac_llvm_context *ctx)
30957ec681f3Smrg{
30967ec681f3Smrg   struct ac_llvm_flow *flow;
30977ec681f3Smrg
30987ec681f3Smrg   if (ctx->flow->depth >= ctx->flow->depth_max) {
30997ec681f3Smrg      unsigned new_max = MAX2(ctx->flow->depth << 1, AC_LLVM_INITIAL_CF_DEPTH);
31007ec681f3Smrg
31017ec681f3Smrg      ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
31027ec681f3Smrg      ctx->flow->depth_max = new_max;
31037ec681f3Smrg   }
31047ec681f3Smrg
31057ec681f3Smrg   flow = &ctx->flow->stack[ctx->flow->depth];
31067ec681f3Smrg   ctx->flow->depth++;
31077ec681f3Smrg
31087ec681f3Smrg   flow->next_block = NULL;
31097ec681f3Smrg   flow->loop_entry_block = NULL;
31107ec681f3Smrg   return flow;
31117ec681f3Smrg}
31127ec681f3Smrg
31137ec681f3Smrgstatic void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int label_id)
31147ec681f3Smrg{
31157ec681f3Smrg   char buf[32];
31167ec681f3Smrg   snprintf(buf, sizeof(buf), "%s%d", base, label_id);
31177ec681f3Smrg   LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
31187ec681f3Smrg}
31197ec681f3Smrg
31207ec681f3Smrg/* Append a basic block at the level of the parent flow.
31217ec681f3Smrg */
31227ec681f3Smrgstatic LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, const char *name)
31237ec681f3Smrg{
31247ec681f3Smrg   assert(ctx->flow->depth >= 1);
31257ec681f3Smrg
31267ec681f3Smrg   if (ctx->flow->depth >= 2) {
31277ec681f3Smrg      struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
31287ec681f3Smrg
31297ec681f3Smrg      return LLVMInsertBasicBlockInContext(ctx->context, flow->next_block, name);
31307ec681f3Smrg   }
31317ec681f3Smrg
31327ec681f3Smrg   LLVMValueRef main_fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
31337ec681f3Smrg   return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
31347ec681f3Smrg}
31357ec681f3Smrg
31367ec681f3Smrg/* Emit a branch to the given default target for the current block if
31377ec681f3Smrg * applicable -- that is, if the current block does not already contain a
31387ec681f3Smrg * branch from a break or continue.
31397ec681f3Smrg */
31407ec681f3Smrgstatic void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
31417ec681f3Smrg{
31427ec681f3Smrg   if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
31437ec681f3Smrg      LLVMBuildBr(builder, target);
31447ec681f3Smrg}
31457ec681f3Smrg
31467ec681f3Smrgvoid ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
31477ec681f3Smrg{
31487ec681f3Smrg   struct ac_llvm_flow *flow = push_flow(ctx);
31497ec681f3Smrg   flow->loop_entry_block = append_basic_block(ctx, "LOOP");
31507ec681f3Smrg   flow->next_block = append_basic_block(ctx, "ENDLOOP");
31517ec681f3Smrg   set_basicblock_name(flow->loop_entry_block, "loop", label_id);
31527ec681f3Smrg   LLVMBuildBr(ctx->builder, flow->loop_entry_block);
31537ec681f3Smrg   LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
31547ec681f3Smrg}
31557ec681f3Smrg
31567ec681f3Smrgvoid ac_build_break(struct ac_llvm_context *ctx)
31577ec681f3Smrg{
31587ec681f3Smrg   struct ac_llvm_flow *flow = get_innermost_loop(ctx);
31597ec681f3Smrg   LLVMBuildBr(ctx->builder, flow->next_block);
31607ec681f3Smrg}
31617ec681f3Smrg
31627ec681f3Smrgvoid ac_build_continue(struct ac_llvm_context *ctx)
31637ec681f3Smrg{
31647ec681f3Smrg   struct ac_llvm_flow *flow = get_innermost_loop(ctx);
31657ec681f3Smrg   LLVMBuildBr(ctx->builder, flow->loop_entry_block);
31667ec681f3Smrg}
31677ec681f3Smrg
31687ec681f3Smrgvoid ac_build_else(struct ac_llvm_context *ctx, int label_id)
31697ec681f3Smrg{
31707ec681f3Smrg   struct ac_llvm_flow *current_branch = get_current_flow(ctx);
31717ec681f3Smrg   LLVMBasicBlockRef endif_block;
31727ec681f3Smrg
31737ec681f3Smrg   assert(!current_branch->loop_entry_block);
31747ec681f3Smrg
31757ec681f3Smrg   endif_block = append_basic_block(ctx, "ENDIF");
31767ec681f3Smrg   emit_default_branch(ctx->builder, endif_block);
31777ec681f3Smrg
31787ec681f3Smrg   LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
31797ec681f3Smrg   set_basicblock_name(current_branch->next_block, "else", label_id);
31807ec681f3Smrg
31817ec681f3Smrg   current_branch->next_block = endif_block;
31827ec681f3Smrg}
31837ec681f3Smrg
31847ec681f3Smrg/* Invoked after a branch is exited. */
31857ec681f3Smrgstatic void ac_branch_exited(struct ac_llvm_context *ctx)
31867ec681f3Smrg{
31877ec681f3Smrg   if (ctx->flow->depth == 0 && ctx->conditional_demote_seen) {
31887ec681f3Smrg      /* The previous conditional branch contained demote. Kill threads
31897ec681f3Smrg       * after all conditional blocks because amdgcn.wqm.vote doesn't
31907ec681f3Smrg       * return usable values inside the blocks.
31917ec681f3Smrg       *
31927ec681f3Smrg       * This is an optional optimization that only kills whole inactive quads.
31937ec681f3Smrg       */
31947ec681f3Smrg      LLVMValueRef cond = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
31957ec681f3Smrg      ac_build_kill_if_false(ctx, ac_build_wqm_vote(ctx, cond));
31967ec681f3Smrg      ctx->conditional_demote_seen = false;
31977ec681f3Smrg   }
31987ec681f3Smrg}
31997ec681f3Smrg
32007ec681f3Smrgvoid ac_build_endif(struct ac_llvm_context *ctx, int label_id)
32017ec681f3Smrg{
32027ec681f3Smrg   struct ac_llvm_flow *current_branch = get_current_flow(ctx);
32037ec681f3Smrg
32047ec681f3Smrg   assert(!current_branch->loop_entry_block);
32057ec681f3Smrg
32067ec681f3Smrg   emit_default_branch(ctx->builder, current_branch->next_block);
32077ec681f3Smrg   LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
32087ec681f3Smrg   set_basicblock_name(current_branch->next_block, "endif", label_id);
32097ec681f3Smrg
32107ec681f3Smrg   ctx->flow->depth--;
32117ec681f3Smrg   ac_branch_exited(ctx);
32127ec681f3Smrg}
32137ec681f3Smrg
32147ec681f3Smrgvoid ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
32157ec681f3Smrg{
32167ec681f3Smrg   struct ac_llvm_flow *current_loop = get_current_flow(ctx);
32177ec681f3Smrg
32187ec681f3Smrg   assert(current_loop->loop_entry_block);
32197ec681f3Smrg
32207ec681f3Smrg   emit_default_branch(ctx->builder, current_loop->loop_entry_block);
32217ec681f3Smrg
32227ec681f3Smrg   LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
32237ec681f3Smrg   set_basicblock_name(current_loop->next_block, "endloop", label_id);
32247ec681f3Smrg   ctx->flow->depth--;
32257ec681f3Smrg   ac_branch_exited(ctx);
32267ec681f3Smrg}
32277ec681f3Smrg
32287ec681f3Smrgvoid ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
32297ec681f3Smrg{
32307ec681f3Smrg   struct ac_llvm_flow *flow = push_flow(ctx);
32317ec681f3Smrg   LLVMBasicBlockRef if_block;
32327ec681f3Smrg
32337ec681f3Smrg   if_block = append_basic_block(ctx, "IF");
32347ec681f3Smrg   flow->next_block = append_basic_block(ctx, "ELSE");
32357ec681f3Smrg   set_basicblock_name(if_block, "if", label_id);
32367ec681f3Smrg   LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
32377ec681f3Smrg   LLVMPositionBuilderAtEnd(ctx->builder, if_block);
32387ec681f3Smrg}
32397ec681f3Smrg
32407ec681f3SmrgLLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name)
32417ec681f3Smrg{
32427ec681f3Smrg   LLVMBuilderRef builder = ac->builder;
32437ec681f3Smrg   LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
32447ec681f3Smrg   LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
32457ec681f3Smrg   LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
32467ec681f3Smrg   LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
32477ec681f3Smrg   LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
32487ec681f3Smrg   LLVMValueRef res;
32497ec681f3Smrg
32507ec681f3Smrg   if (first_instr) {
32517ec681f3Smrg      LLVMPositionBuilderBefore(first_builder, first_instr);
32527ec681f3Smrg   } else {
32537ec681f3Smrg      LLVMPositionBuilderAtEnd(first_builder, first_block);
32547ec681f3Smrg   }
32557ec681f3Smrg
32567ec681f3Smrg   res = LLVMBuildAlloca(first_builder, type, name);
32577ec681f3Smrg   LLVMDisposeBuilder(first_builder);
32587ec681f3Smrg   return res;
32597ec681f3Smrg}
32607ec681f3Smrg
32617ec681f3SmrgLLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name)
32627ec681f3Smrg{
32637ec681f3Smrg   LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
32647ec681f3Smrg   LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
32657ec681f3Smrg   return ptr;
32667ec681f3Smrg}
32677ec681f3Smrg
32687ec681f3SmrgLLVMValueRef ac_build_alloca_init(struct ac_llvm_context *ac, LLVMValueRef val, const char *name)
32697ec681f3Smrg{
32707ec681f3Smrg   LLVMValueRef ptr = ac_build_alloca_undef(ac, LLVMTypeOf(val), name);
32717ec681f3Smrg   LLVMBuildStore(ac->builder, val, ptr);
32727ec681f3Smrg   return ptr;
32737ec681f3Smrg}
32747ec681f3Smrg
32757ec681f3SmrgLLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMTypeRef type)
32767ec681f3Smrg{
32777ec681f3Smrg   int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
32787ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
32797ec681f3Smrg}
32807ec681f3Smrg
32817ec681f3SmrgLLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count)
32827ec681f3Smrg{
32837ec681f3Smrg   unsigned num_components = ac_get_llvm_num_components(value);
32847ec681f3Smrg   if (count == num_components)
32857ec681f3Smrg      return value;
32867ec681f3Smrg
32877ec681f3Smrg   LLVMValueRef *const masks = alloca(MAX2(count, 2) * sizeof(LLVMValueRef));
32887ec681f3Smrg   masks[0] = ctx->i32_0;
32897ec681f3Smrg   masks[1] = ctx->i32_1;
32907ec681f3Smrg   for (unsigned i = 2; i < count; i++)
32917ec681f3Smrg      masks[i] = LLVMConstInt(ctx->i32, i, false);
32927ec681f3Smrg
32937ec681f3Smrg   if (count == 1)
32947ec681f3Smrg      return LLVMBuildExtractElement(ctx->builder, value, masks[0], "");
32957ec681f3Smrg
32967ec681f3Smrg   LLVMValueRef swizzle = LLVMConstVector(masks, count);
32977ec681f3Smrg   return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
32987ec681f3Smrg}
32997ec681f3Smrg
33007ec681f3Smrg/* If param is i64 and bitwidth <= 32, the return value will be i32. */
33017ec681f3SmrgLLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift,
33027ec681f3Smrg                             unsigned bitwidth)
33037ec681f3Smrg{
33047ec681f3Smrg   LLVMValueRef value = param;
33057ec681f3Smrg   if (rshift)
33067ec681f3Smrg      value = LLVMBuildLShr(ctx->builder, value, LLVMConstInt(LLVMTypeOf(param), rshift, false), "");
33077ec681f3Smrg
33087ec681f3Smrg   if (rshift + bitwidth < 32) {
33097ec681f3Smrg      uint64_t mask = (1ull << bitwidth) - 1;
33107ec681f3Smrg      value = LLVMBuildAnd(ctx->builder, value, LLVMConstInt(LLVMTypeOf(param), mask, false), "");
33117ec681f3Smrg   }
33127ec681f3Smrg
33137ec681f3Smrg   if (bitwidth <= 32 && LLVMTypeOf(param) == ctx->i64)
33147ec681f3Smrg      value = LLVMBuildTrunc(ctx->builder, value, ctx->i32, "");
33157ec681f3Smrg   return value;
33167ec681f3Smrg}
33177ec681f3Smrg
33187ec681f3Smrg/* Adjust the sample index according to FMASK.
33197ec681f3Smrg *
33207ec681f3Smrg * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
33217ec681f3Smrg * which is the identity mapping. Each nibble says which physical sample
33227ec681f3Smrg * should be fetched to get that sample.
33237ec681f3Smrg *
33247ec681f3Smrg * For example, 0x11111100 means there are only 2 samples stored and
33257ec681f3Smrg * the second sample covers 3/4 of the pixel. When reading samples 0
33267ec681f3Smrg * and 1, return physical sample 0 (determined by the first two 0s
33277ec681f3Smrg * in FMASK), otherwise return physical sample 1.
33287ec681f3Smrg *
33297ec681f3Smrg * The sample index should be adjusted as follows:
33307ec681f3Smrg *   addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
33317ec681f3Smrg */
33327ec681f3Smrgvoid ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LLVMValueRef *addr,
33337ec681f3Smrg                              bool is_array_tex)
33347ec681f3Smrg{
33357ec681f3Smrg   struct ac_image_args fmask_load = {0};
33367ec681f3Smrg   fmask_load.opcode = ac_image_load;
33377ec681f3Smrg   fmask_load.resource = fmask;
33387ec681f3Smrg   fmask_load.dmask = 0xf;
33397ec681f3Smrg   fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
33407ec681f3Smrg   fmask_load.attributes = AC_FUNC_ATTR_READNONE;
33417ec681f3Smrg
33427ec681f3Smrg   fmask_load.coords[0] = addr[0];
33437ec681f3Smrg   fmask_load.coords[1] = addr[1];
33447ec681f3Smrg   if (is_array_tex)
33457ec681f3Smrg      fmask_load.coords[2] = addr[2];
33467ec681f3Smrg   fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16;
33477ec681f3Smrg
33487ec681f3Smrg   LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
33497ec681f3Smrg   fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, "");
33507ec681f3Smrg
33517ec681f3Smrg   /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
33527ec681f3Smrg    * resource descriptor is 0 (invalid).
33537ec681f3Smrg    */
33547ec681f3Smrg   LLVMValueRef tmp;
33557ec681f3Smrg   tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
33567ec681f3Smrg   tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
33577ec681f3Smrg   tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
33587ec681f3Smrg   fmask_value =
33597ec681f3Smrg      LLVMBuildSelect(ac->builder, tmp, fmask_value, LLVMConstInt(ac->i32, 0x76543210, false), "");
33607ec681f3Smrg
33617ec681f3Smrg   /* Apply the formula. */
33627ec681f3Smrg   unsigned sample_chan = is_array_tex ? 3 : 2;
33637ec681f3Smrg   LLVMValueRef final_sample;
33647ec681f3Smrg   final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
33657ec681f3Smrg                               LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), "");
33667ec681f3Smrg   final_sample = LLVMBuildLShr(ac->builder, fmask_value,
33677ec681f3Smrg                                LLVMBuildZExt(ac->builder, final_sample, ac->i32, ""), "");
33687ec681f3Smrg   /* Mask the sample index by 0x7, because 0x8 means an unknown value
33697ec681f3Smrg    * with EQAA, so those will map to 0. */
33707ec681f3Smrg   addr[sample_chan] = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), "");
33717ec681f3Smrg   if (fmask_load.a16)
33727ec681f3Smrg      addr[sample_chan] = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, "");
33737ec681f3Smrg}
33747ec681f3Smrg
33757ec681f3Smrgstatic LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
33767ec681f3Smrg                                       LLVMValueRef lane, bool with_opt_barrier)
33777ec681f3Smrg{
33787ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(src);
33797ec681f3Smrg   LLVMValueRef result;
33807ec681f3Smrg
33817ec681f3Smrg   if (with_opt_barrier)
33827ec681f3Smrg      ac_build_optimization_barrier(ctx, &src, false);
33837ec681f3Smrg
33847ec681f3Smrg   src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
33857ec681f3Smrg   if (lane)
33867ec681f3Smrg      lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
33877ec681f3Smrg
33887ec681f3Smrg   result =
33897ec681f3Smrg      ac_build_intrinsic(ctx, lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
33907ec681f3Smrg                         ctx->i32, (LLVMValueRef[]){src, lane}, lane == NULL ? 1 : 2,
33917ec681f3Smrg                         AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
33927ec681f3Smrg
33937ec681f3Smrg   return LLVMBuildTrunc(ctx->builder, result, type, "");
33947ec681f3Smrg}
33957ec681f3Smrg
33967ec681f3Smrgstatic LLVMValueRef ac_build_readlane_common(struct ac_llvm_context *ctx, LLVMValueRef src,
33977ec681f3Smrg                                             LLVMValueRef lane, bool with_opt_barrier)
33987ec681f3Smrg{
33997ec681f3Smrg   LLVMTypeRef src_type = LLVMTypeOf(src);
34007ec681f3Smrg   src = ac_to_integer(ctx, src);
34017ec681f3Smrg   unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
34027ec681f3Smrg   LLVMValueRef ret;
34037ec681f3Smrg
34047ec681f3Smrg   if (bits > 32) {
34057ec681f3Smrg      assert(bits % 32 == 0);
34067ec681f3Smrg      LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
34077ec681f3Smrg      LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
34087ec681f3Smrg      ret = LLVMGetUndef(vec_type);
34097ec681f3Smrg      for (unsigned i = 0; i < bits / 32; i++) {
34107ec681f3Smrg         LLVMValueRef ret_comp;
34117ec681f3Smrg
34127ec681f3Smrg         src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
34137ec681f3Smrg
34147ec681f3Smrg         ret_comp = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
34157ec681f3Smrg
34167ec681f3Smrg         ret =
34177ec681f3Smrg            LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
34187ec681f3Smrg      }
34197ec681f3Smrg   } else {
34207ec681f3Smrg      ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
34217ec681f3Smrg   }
34227ec681f3Smrg
34237ec681f3Smrg   if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
34247ec681f3Smrg      return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
34257ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
34267ec681f3Smrg}
34277ec681f3Smrg
34287ec681f3Smrg/**
34297ec681f3Smrg * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
34307ec681f3Smrg *
34317ec681f3Smrg * The optimization barrier is not needed if the value is the same in all lanes
34327ec681f3Smrg * or if this is called in the outermost block.
34337ec681f3Smrg *
34347ec681f3Smrg * @param ctx
34357ec681f3Smrg * @param src
34367ec681f3Smrg * @param lane - id of the lane or NULL for the first active lane
34377ec681f3Smrg * @return value of the lane
34387ec681f3Smrg */
34397ec681f3SmrgLLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src,
34407ec681f3Smrg                                              LLVMValueRef lane)
34417ec681f3Smrg{
34427ec681f3Smrg   return ac_build_readlane_common(ctx, src, lane, false);
34437ec681f3Smrg}
34447ec681f3Smrg
34457ec681f3SmrgLLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
34467ec681f3Smrg{
34477ec681f3Smrg   return ac_build_readlane_common(ctx, src, lane, true);
34487ec681f3Smrg}
34497ec681f3Smrg
34507ec681f3SmrgLLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value,
34517ec681f3Smrg                                LLVMValueRef lane)
34527ec681f3Smrg{
34537ec681f3Smrg   return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
34547ec681f3Smrg                             (LLVMValueRef[]){value, lane, src}, 3,
34557ec681f3Smrg                             AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
34567ec681f3Smrg}
34577ec681f3Smrg
34587ec681f3SmrgLLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src)
34597ec681f3Smrg{
34607ec681f3Smrg   if (ctx->wave_size == 32) {
34617ec681f3Smrg      LLVMValueRef val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
34627ec681f3Smrg                                (LLVMValueRef[]){mask, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE);
34637ec681f3Smrg      ac_set_range_metadata(ctx, val, 0, ctx->wave_size);
34647ec681f3Smrg      return val;
34657ec681f3Smrg   }
34667ec681f3Smrg   LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, "");
34677ec681f3Smrg   LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_0, "");
34687ec681f3Smrg   LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_1, "");
34697ec681f3Smrg   LLVMValueRef val =
34707ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
34717ec681f3Smrg                         (LLVMValueRef[]){mask_lo, add_src}, 2, AC_FUNC_ATTR_READNONE);
34727ec681f3Smrg   val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, (LLVMValueRef[]){mask_hi, val},
34737ec681f3Smrg                            2, AC_FUNC_ATTR_READNONE);
34747ec681f3Smrg   ac_set_range_metadata(ctx, val, 0, ctx->wave_size);
34757ec681f3Smrg   return val;
34767ec681f3Smrg}
34777ec681f3Smrg
34787ec681f3SmrgLLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
34797ec681f3Smrg{
34807ec681f3Smrg   return ac_build_mbcnt_add(ctx, mask, ctx->i32_0);
34817ec681f3Smrg}
34827ec681f3Smrg
34837ec681f3Smrgenum dpp_ctrl
34847ec681f3Smrg{
34857ec681f3Smrg   _dpp_quad_perm = 0x000,
34867ec681f3Smrg   _dpp_row_sl = 0x100,
34877ec681f3Smrg   _dpp_row_sr = 0x110,
34887ec681f3Smrg   _dpp_row_rr = 0x120,
34897ec681f3Smrg   dpp_wf_sl1 = 0x130,
34907ec681f3Smrg   dpp_wf_rl1 = 0x134,
34917ec681f3Smrg   dpp_wf_sr1 = 0x138,
34927ec681f3Smrg   dpp_wf_rr1 = 0x13C,
34937ec681f3Smrg   dpp_row_mirror = 0x140,
34947ec681f3Smrg   dpp_row_half_mirror = 0x141,
34957ec681f3Smrg   dpp_row_bcast15 = 0x142,
34967ec681f3Smrg   dpp_row_bcast31 = 0x143
34977ec681f3Smrg};
34987ec681f3Smrg
34997ec681f3Smrgstatic inline enum dpp_ctrl dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2,
35007ec681f3Smrg                                          unsigned lane3)
35017ec681f3Smrg{
35027ec681f3Smrg   assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
35037ec681f3Smrg   return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
35047ec681f3Smrg}
35057ec681f3Smrg
35067ec681f3Smrgstatic inline enum dpp_ctrl dpp_row_sl(unsigned amount)
35077ec681f3Smrg{
35087ec681f3Smrg   assert(amount > 0 && amount < 16);
35097ec681f3Smrg   return _dpp_row_sl | amount;
35107ec681f3Smrg}
35117ec681f3Smrg
35127ec681f3Smrgstatic inline enum dpp_ctrl dpp_row_sr(unsigned amount)
35137ec681f3Smrg{
35147ec681f3Smrg   assert(amount > 0 && amount < 16);
35157ec681f3Smrg   return _dpp_row_sr | amount;
35167ec681f3Smrg}
35177ec681f3Smrg
35187ec681f3Smrgstatic LLVMValueRef _ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
35197ec681f3Smrg                                  enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
35207ec681f3Smrg                                  bool bound_ctrl)
35217ec681f3Smrg{
35227ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(src);
35237ec681f3Smrg   LLVMValueRef res;
35247ec681f3Smrg
35257ec681f3Smrg   old = LLVMBuildZExt(ctx->builder, old, ctx->i32, "");
35267ec681f3Smrg   src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
35277ec681f3Smrg
35287ec681f3Smrg   res = ac_build_intrinsic(
35297ec681f3Smrg      ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32,
35307ec681f3Smrg      (LLVMValueRef[]){old, src, LLVMConstInt(ctx->i32, dpp_ctrl, 0),
35317ec681f3Smrg                       LLVMConstInt(ctx->i32, row_mask, 0), LLVMConstInt(ctx->i32, bank_mask, 0),
35327ec681f3Smrg                       LLVMConstInt(ctx->i1, bound_ctrl, 0)},
35337ec681f3Smrg      6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
35347ec681f3Smrg
35357ec681f3Smrg   return LLVMBuildTrunc(ctx->builder, res, type, "");
35367ec681f3Smrg}
35377ec681f3Smrg
35387ec681f3Smrgstatic LLVMValueRef ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
35397ec681f3Smrg                                 enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
35407ec681f3Smrg                                 bool bound_ctrl)
35417ec681f3Smrg{
35427ec681f3Smrg   LLVMTypeRef src_type = LLVMTypeOf(src);
35437ec681f3Smrg   src = ac_to_integer(ctx, src);
35447ec681f3Smrg   old = ac_to_integer(ctx, old);
35457ec681f3Smrg   unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
35467ec681f3Smrg   LLVMValueRef ret;
35477ec681f3Smrg   if (bits > 32) {
35487ec681f3Smrg      assert(bits % 32 == 0);
35497ec681f3Smrg      LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
35507ec681f3Smrg      LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
35517ec681f3Smrg      LLVMValueRef old_vector = LLVMBuildBitCast(ctx->builder, old, vec_type, "");
35527ec681f3Smrg      ret = LLVMGetUndef(vec_type);
35537ec681f3Smrg      for (unsigned i = 0; i < bits / 32; i++) {
35547ec681f3Smrg         src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
35557ec681f3Smrg         old = LLVMBuildExtractElement(ctx->builder, old_vector, LLVMConstInt(ctx->i32, i, 0), "");
35567ec681f3Smrg         LLVMValueRef ret_comp =
35577ec681f3Smrg            _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
35587ec681f3Smrg         ret =
35597ec681f3Smrg            LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
35607ec681f3Smrg      }
35617ec681f3Smrg   } else {
35627ec681f3Smrg      ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
35637ec681f3Smrg   }
35647ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
35657ec681f3Smrg}
35667ec681f3Smrg
35677ec681f3Smrgstatic LLVMValueRef _ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src,
35687ec681f3Smrg                                         uint64_t sel, bool exchange_rows, bool bound_ctrl)
35697ec681f3Smrg{
35707ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(src);
35717ec681f3Smrg   LLVMValueRef result;
35727ec681f3Smrg
35737ec681f3Smrg   src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
35747ec681f3Smrg
35757ec681f3Smrg   LLVMValueRef args[6] = {
35767ec681f3Smrg      src,
35777ec681f3Smrg      src,
35787ec681f3Smrg      LLVMConstInt(ctx->i32, sel, false),
35797ec681f3Smrg      LLVMConstInt(ctx->i32, sel >> 32, false),
35807ec681f3Smrg      ctx->i1true, /* fi */
35817ec681f3Smrg      bound_ctrl ? ctx->i1true : ctx->i1false,
35827ec681f3Smrg   };
35837ec681f3Smrg
35847ec681f3Smrg   result =
35857ec681f3Smrg      ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16" : "llvm.amdgcn.permlane16",
35867ec681f3Smrg                         ctx->i32, args, 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
35877ec681f3Smrg
35887ec681f3Smrg   return LLVMBuildTrunc(ctx->builder, result, type, "");
35897ec681f3Smrg}
35907ec681f3Smrg
35917ec681f3Smrgstatic LLVMValueRef ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
35927ec681f3Smrg                                        bool exchange_rows, bool bound_ctrl)
35937ec681f3Smrg{
35947ec681f3Smrg   LLVMTypeRef src_type = LLVMTypeOf(src);
35957ec681f3Smrg   src = ac_to_integer(ctx, src);
35967ec681f3Smrg   unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
35977ec681f3Smrg   LLVMValueRef ret;
35987ec681f3Smrg   if (bits > 32) {
35997ec681f3Smrg      assert(bits % 32 == 0);
36007ec681f3Smrg      LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
36017ec681f3Smrg      LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
36027ec681f3Smrg      ret = LLVMGetUndef(vec_type);
36037ec681f3Smrg      for (unsigned i = 0; i < bits / 32; i++) {
36047ec681f3Smrg         src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
36057ec681f3Smrg         LLVMValueRef ret_comp = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
36067ec681f3Smrg         ret =
36077ec681f3Smrg            LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
36087ec681f3Smrg      }
36097ec681f3Smrg   } else {
36107ec681f3Smrg      ret = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
36117ec681f3Smrg   }
36127ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
36137ec681f3Smrg}
36147ec681f3Smrg
36157ec681f3Smrgstatic inline unsigned ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
36167ec681f3Smrg{
36177ec681f3Smrg   assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
36187ec681f3Smrg   return and_mask | (or_mask << 5) | (xor_mask << 10);
36197ec681f3Smrg}
36207ec681f3Smrg
36217ec681f3Smrgstatic LLVMValueRef _ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
36227ec681f3Smrg                                         unsigned mask)
36237ec681f3Smrg{
36247ec681f3Smrg   LLVMTypeRef src_type = LLVMTypeOf(src);
36257ec681f3Smrg   LLVMValueRef ret;
36267ec681f3Smrg
36277ec681f3Smrg   src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
36287ec681f3Smrg
36297ec681f3Smrg   ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32,
36307ec681f3Smrg                            (LLVMValueRef[]){src, LLVMConstInt(ctx->i32, mask, 0)}, 2,
36317ec681f3Smrg                            AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
36327ec681f3Smrg
36337ec681f3Smrg   return LLVMBuildTrunc(ctx->builder, ret, src_type, "");
36347ec681f3Smrg}
36357ec681f3Smrg
36367ec681f3SmrgLLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
36377ec681f3Smrg{
36387ec681f3Smrg   LLVMTypeRef src_type = LLVMTypeOf(src);
36397ec681f3Smrg   src = ac_to_integer(ctx, src);
36407ec681f3Smrg   unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
36417ec681f3Smrg   LLVMValueRef ret;
36427ec681f3Smrg   if (bits > 32) {
36437ec681f3Smrg      assert(bits % 32 == 0);
36447ec681f3Smrg      LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
36457ec681f3Smrg      LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
36467ec681f3Smrg      ret = LLVMGetUndef(vec_type);
36477ec681f3Smrg      for (unsigned i = 0; i < bits / 32; i++) {
36487ec681f3Smrg         src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
36497ec681f3Smrg         LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, mask);
36507ec681f3Smrg         ret =
36517ec681f3Smrg            LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
36527ec681f3Smrg      }
36537ec681f3Smrg   } else {
36547ec681f3Smrg      ret = _ac_build_ds_swizzle(ctx, src, mask);
36557ec681f3Smrg   }
36567ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
36577ec681f3Smrg}
36587ec681f3Smrg
36597ec681f3Smrgstatic LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
36607ec681f3Smrg{
36617ec681f3Smrg   LLVMTypeRef src_type = LLVMTypeOf(src);
36627ec681f3Smrg   unsigned bitsize = ac_get_elem_bits(ctx, src_type);
36637ec681f3Smrg   char name[32], type[8];
36647ec681f3Smrg   LLVMValueRef ret;
36657ec681f3Smrg
36667ec681f3Smrg   src = ac_to_integer(ctx, src);
36677ec681f3Smrg
36687ec681f3Smrg   if (bitsize < 32)
36697ec681f3Smrg      src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
36707ec681f3Smrg
36717ec681f3Smrg   ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
36727ec681f3Smrg   snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
36737ec681f3Smrg   ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1,
36747ec681f3Smrg                            AC_FUNC_ATTR_READNONE);
36757ec681f3Smrg
36767ec681f3Smrg   if (bitsize < 32)
36777ec681f3Smrg      ret = LLVMBuildTrunc(ctx->builder, ret, ac_to_integer_type(ctx, src_type), "");
36787ec681f3Smrg
36797ec681f3Smrg   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
36807ec681f3Smrg}
36817ec681f3Smrg
36827ec681f3Smrgstatic LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
36837ec681f3Smrg                                          LLVMValueRef inactive)
36847ec681f3Smrg{
36857ec681f3Smrg   char name[33], type[8];
36867ec681f3Smrg   LLVMTypeRef src_type = LLVMTypeOf(src);
36877ec681f3Smrg   unsigned bitsize = ac_get_elem_bits(ctx, src_type);
36887ec681f3Smrg   src = ac_to_integer(ctx, src);
36897ec681f3Smrg   inactive = ac_to_integer(ctx, inactive);
36907ec681f3Smrg
36917ec681f3Smrg   if (bitsize < 32) {
36927ec681f3Smrg      src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
36937ec681f3Smrg      inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, "");
36947ec681f3Smrg   }
36957ec681f3Smrg
36967ec681f3Smrg   ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
36977ec681f3Smrg   snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
36987ec681f3Smrg   LLVMValueRef ret =
36997ec681f3Smrg      ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src, inactive}, 2,
37007ec681f3Smrg                         AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
37017ec681f3Smrg   if (bitsize < 32)
37027ec681f3Smrg      ret = LLVMBuildTrunc(ctx->builder, ret, src_type, "");
37037ec681f3Smrg
37047ec681f3Smrg   return ret;
37057ec681f3Smrg}
37067ec681f3Smrg
37077ec681f3Smrgstatic LLVMValueRef get_reduction_identity(struct ac_llvm_context *ctx, nir_op op,
37087ec681f3Smrg                                           unsigned type_size)
37097ec681f3Smrg{
37107ec681f3Smrg
37117ec681f3Smrg   if (type_size == 0) {
37127ec681f3Smrg      switch (op) {
37137ec681f3Smrg      case nir_op_ior:
37147ec681f3Smrg      case nir_op_ixor:
37157ec681f3Smrg         return LLVMConstInt(ctx->i1, 0, 0);
37167ec681f3Smrg      case nir_op_iand:
37177ec681f3Smrg         return LLVMConstInt(ctx->i1, 1, 0);
37187ec681f3Smrg      default:
37197ec681f3Smrg         unreachable("bad reduction intrinsic");
37207ec681f3Smrg      }
37217ec681f3Smrg   } else if (type_size == 1) {
37227ec681f3Smrg      switch (op) {
37237ec681f3Smrg      case nir_op_iadd:
37247ec681f3Smrg         return ctx->i8_0;
37257ec681f3Smrg      case nir_op_imul:
37267ec681f3Smrg         return ctx->i8_1;
37277ec681f3Smrg      case nir_op_imin:
37287ec681f3Smrg         return LLVMConstInt(ctx->i8, INT8_MAX, 0);
37297ec681f3Smrg      case nir_op_umin:
37307ec681f3Smrg         return LLVMConstInt(ctx->i8, UINT8_MAX, 0);
37317ec681f3Smrg      case nir_op_imax:
37327ec681f3Smrg         return LLVMConstInt(ctx->i8, INT8_MIN, 0);
37337ec681f3Smrg      case nir_op_umax:
37347ec681f3Smrg         return ctx->i8_0;
37357ec681f3Smrg      case nir_op_iand:
37367ec681f3Smrg         return LLVMConstInt(ctx->i8, -1, 0);
37377ec681f3Smrg      case nir_op_ior:
37387ec681f3Smrg         return ctx->i8_0;
37397ec681f3Smrg      case nir_op_ixor:
37407ec681f3Smrg         return ctx->i8_0;
37417ec681f3Smrg      default:
37427ec681f3Smrg         unreachable("bad reduction intrinsic");
37437ec681f3Smrg      }
37447ec681f3Smrg   } else if (type_size == 2) {
37457ec681f3Smrg      switch (op) {
37467ec681f3Smrg      case nir_op_iadd:
37477ec681f3Smrg         return ctx->i16_0;
37487ec681f3Smrg      case nir_op_fadd:
37497ec681f3Smrg         return ctx->f16_0;
37507ec681f3Smrg      case nir_op_imul:
37517ec681f3Smrg         return ctx->i16_1;
37527ec681f3Smrg      case nir_op_fmul:
37537ec681f3Smrg         return ctx->f16_1;
37547ec681f3Smrg      case nir_op_imin:
37557ec681f3Smrg         return LLVMConstInt(ctx->i16, INT16_MAX, 0);
37567ec681f3Smrg      case nir_op_umin:
37577ec681f3Smrg         return LLVMConstInt(ctx->i16, UINT16_MAX, 0);
37587ec681f3Smrg      case nir_op_fmin:
37597ec681f3Smrg         return LLVMConstReal(ctx->f16, INFINITY);
37607ec681f3Smrg      case nir_op_imax:
37617ec681f3Smrg         return LLVMConstInt(ctx->i16, INT16_MIN, 0);
37627ec681f3Smrg      case nir_op_umax:
37637ec681f3Smrg         return ctx->i16_0;
37647ec681f3Smrg      case nir_op_fmax:
37657ec681f3Smrg         return LLVMConstReal(ctx->f16, -INFINITY);
37667ec681f3Smrg      case nir_op_iand:
37677ec681f3Smrg         return LLVMConstInt(ctx->i16, -1, 0);
37687ec681f3Smrg      case nir_op_ior:
37697ec681f3Smrg         return ctx->i16_0;
37707ec681f3Smrg      case nir_op_ixor:
37717ec681f3Smrg         return ctx->i16_0;
37727ec681f3Smrg      default:
37737ec681f3Smrg         unreachable("bad reduction intrinsic");
37747ec681f3Smrg      }
37757ec681f3Smrg   } else if (type_size == 4) {
37767ec681f3Smrg      switch (op) {
37777ec681f3Smrg      case nir_op_iadd:
37787ec681f3Smrg         return ctx->i32_0;
37797ec681f3Smrg      case nir_op_fadd:
37807ec681f3Smrg         return ctx->f32_0;
37817ec681f3Smrg      case nir_op_imul:
37827ec681f3Smrg         return ctx->i32_1;
37837ec681f3Smrg      case nir_op_fmul:
37847ec681f3Smrg         return ctx->f32_1;
37857ec681f3Smrg      case nir_op_imin:
37867ec681f3Smrg         return LLVMConstInt(ctx->i32, INT32_MAX, 0);
37877ec681f3Smrg      case nir_op_umin:
37887ec681f3Smrg         return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
37897ec681f3Smrg      case nir_op_fmin:
37907ec681f3Smrg         return LLVMConstReal(ctx->f32, INFINITY);
37917ec681f3Smrg      case nir_op_imax:
37927ec681f3Smrg         return LLVMConstInt(ctx->i32, INT32_MIN, 0);
37937ec681f3Smrg      case nir_op_umax:
37947ec681f3Smrg         return ctx->i32_0;
37957ec681f3Smrg      case nir_op_fmax:
37967ec681f3Smrg         return LLVMConstReal(ctx->f32, -INFINITY);
37977ec681f3Smrg      case nir_op_iand:
37987ec681f3Smrg         return LLVMConstInt(ctx->i32, -1, 0);
37997ec681f3Smrg      case nir_op_ior:
38007ec681f3Smrg         return ctx->i32_0;
38017ec681f3Smrg      case nir_op_ixor:
38027ec681f3Smrg         return ctx->i32_0;
38037ec681f3Smrg      default:
38047ec681f3Smrg         unreachable("bad reduction intrinsic");
38057ec681f3Smrg      }
38067ec681f3Smrg   } else { /* type_size == 64bit */
38077ec681f3Smrg      switch (op) {
38087ec681f3Smrg      case nir_op_iadd:
38097ec681f3Smrg         return ctx->i64_0;
38107ec681f3Smrg      case nir_op_fadd:
38117ec681f3Smrg         return ctx->f64_0;
38127ec681f3Smrg      case nir_op_imul:
38137ec681f3Smrg         return ctx->i64_1;
38147ec681f3Smrg      case nir_op_fmul:
38157ec681f3Smrg         return ctx->f64_1;
38167ec681f3Smrg      case nir_op_imin:
38177ec681f3Smrg         return LLVMConstInt(ctx->i64, INT64_MAX, 0);
38187ec681f3Smrg      case nir_op_umin:
38197ec681f3Smrg         return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
38207ec681f3Smrg      case nir_op_fmin:
38217ec681f3Smrg         return LLVMConstReal(ctx->f64, INFINITY);
38227ec681f3Smrg      case nir_op_imax:
38237ec681f3Smrg         return LLVMConstInt(ctx->i64, INT64_MIN, 0);
38247ec681f3Smrg      case nir_op_umax:
38257ec681f3Smrg         return ctx->i64_0;
38267ec681f3Smrg      case nir_op_fmax:
38277ec681f3Smrg         return LLVMConstReal(ctx->f64, -INFINITY);
38287ec681f3Smrg      case nir_op_iand:
38297ec681f3Smrg         return LLVMConstInt(ctx->i64, -1, 0);
38307ec681f3Smrg      case nir_op_ior:
38317ec681f3Smrg         return ctx->i64_0;
38327ec681f3Smrg      case nir_op_ixor:
38337ec681f3Smrg         return ctx->i64_0;
38347ec681f3Smrg      default:
38357ec681f3Smrg         unreachable("bad reduction intrinsic");
38367ec681f3Smrg      }
38377ec681f3Smrg   }
38387ec681f3Smrg}
38397ec681f3Smrg
38407ec681f3Smrgstatic LLVMValueRef ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs,
38417ec681f3Smrg                                    nir_op op)
38427ec681f3Smrg{
38437ec681f3Smrg   bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
38447ec681f3Smrg   bool _32bit = ac_get_type_size(LLVMTypeOf(lhs)) == 4;
38457ec681f3Smrg   switch (op) {
38467ec681f3Smrg   case nir_op_iadd:
38477ec681f3Smrg      return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
38487ec681f3Smrg   case nir_op_fadd:
38497ec681f3Smrg      return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
38507ec681f3Smrg   case nir_op_imul:
38517ec681f3Smrg      return LLVMBuildMul(ctx->builder, lhs, rhs, "");
38527ec681f3Smrg   case nir_op_fmul:
38537ec681f3Smrg      return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
38547ec681f3Smrg   case nir_op_imin:
38557ec681f3Smrg      return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
38567ec681f3Smrg                             lhs, rhs, "");
38577ec681f3Smrg   case nir_op_umin:
38587ec681f3Smrg      return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
38597ec681f3Smrg                             lhs, rhs, "");
38607ec681f3Smrg   case nir_op_fmin:
38617ec681f3Smrg      return ac_build_intrinsic(
38627ec681f3Smrg         ctx, _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16",
38637ec681f3Smrg         _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
38647ec681f3Smrg         AC_FUNC_ATTR_READNONE);
38657ec681f3Smrg   case nir_op_imax:
38667ec681f3Smrg      return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
38677ec681f3Smrg                             lhs, rhs, "");
38687ec681f3Smrg   case nir_op_umax:
38697ec681f3Smrg      return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
38707ec681f3Smrg                             lhs, rhs, "");
38717ec681f3Smrg   case nir_op_fmax:
38727ec681f3Smrg      return ac_build_intrinsic(
38737ec681f3Smrg         ctx, _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16",
38747ec681f3Smrg         _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
38757ec681f3Smrg         AC_FUNC_ATTR_READNONE);
38767ec681f3Smrg   case nir_op_iand:
38777ec681f3Smrg      return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
38787ec681f3Smrg   case nir_op_ior:
38797ec681f3Smrg      return LLVMBuildOr(ctx->builder, lhs, rhs, "");
38807ec681f3Smrg   case nir_op_ixor:
38817ec681f3Smrg      return LLVMBuildXor(ctx->builder, lhs, rhs, "");
38827ec681f3Smrg   default:
38837ec681f3Smrg      unreachable("bad reduction intrinsic");
38847ec681f3Smrg   }
38857ec681f3Smrg}
38867ec681f3Smrg
38877ec681f3Smrg/**
38887ec681f3Smrg * \param src The value to shift.
38897ec681f3Smrg * \param identity The value to use the first lane.
38907ec681f3Smrg * \param maxprefix specifies that the result only needs to be correct for a
38917ec681f3Smrg *     prefix of this many threads
38927ec681f3Smrg * \return src, shifted 1 lane up, and identity shifted into lane 0.
38937ec681f3Smrg */
38947ec681f3Smrgstatic LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src,
38957ec681f3Smrg                                               LLVMValueRef identity, unsigned maxprefix)
38967ec681f3Smrg{
38977ec681f3Smrg   if (ctx->chip_class >= GFX10) {
38987ec681f3Smrg      /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
38997ec681f3Smrg      LLVMValueRef active, tmp1, tmp2;
39007ec681f3Smrg      LLVMValueRef tid = ac_get_thread_id(ctx);
39017ec681f3Smrg
39027ec681f3Smrg      tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
39037ec681f3Smrg
39047ec681f3Smrg      tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
39057ec681f3Smrg
39067ec681f3Smrg      if (maxprefix > 32) {
39077ec681f3Smrg         active =
39087ec681f3Smrg            LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, false), "");
39097ec681f3Smrg
39107ec681f3Smrg         tmp2 = LLVMBuildSelect(ctx->builder, active,
39117ec681f3Smrg                                ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, false)),
39127ec681f3Smrg                                tmp2, "");
39137ec681f3Smrg
39147ec681f3Smrg         active = LLVMBuildOr(
39157ec681f3Smrg            ctx->builder, active,
39167ec681f3Smrg            LLVMBuildICmp(ctx->builder, LLVMIntEQ,
39177ec681f3Smrg                          LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, false), ""),
39187ec681f3Smrg                          LLVMConstInt(ctx->i32, 0x10, false), ""),
39197ec681f3Smrg            "");
39207ec681f3Smrg         return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
39217ec681f3Smrg      } else if (maxprefix > 16) {
39227ec681f3Smrg         active =
39237ec681f3Smrg            LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 16, false), "");
39247ec681f3Smrg
39257ec681f3Smrg         return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
39267ec681f3Smrg      }
39277ec681f3Smrg   } else if (ctx->chip_class >= GFX8) {
39287ec681f3Smrg      return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
39297ec681f3Smrg   }
39307ec681f3Smrg
39317ec681f3Smrg   /* wavefront shift_right by 1 on SI/CI */
39327ec681f3Smrg   LLVMValueRef active, tmp1, tmp2;
39337ec681f3Smrg   LLVMValueRef tid = ac_get_thread_id(ctx);
39347ec681f3Smrg   tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
39357ec681f3Smrg   tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00));
39367ec681f3Smrg   active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
39377ec681f3Smrg                          LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""),
39387ec681f3Smrg                          LLVMConstInt(ctx->i32, 0x4, 0), "");
39397ec681f3Smrg   tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
39407ec681f3Smrg   tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00));
39417ec681f3Smrg   active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
39427ec681f3Smrg                          LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""),
39437ec681f3Smrg                          LLVMConstInt(ctx->i32, 0x8, 0), "");
39447ec681f3Smrg   tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
39457ec681f3Smrg   tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00));
39467ec681f3Smrg   active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
39477ec681f3Smrg                          LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""),
39487ec681f3Smrg                          LLVMConstInt(ctx->i32, 0x10, 0), "");
39497ec681f3Smrg   tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
39507ec681f3Smrg   tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0));
39517ec681f3Smrg   active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), "");
39527ec681f3Smrg   tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
39537ec681f3Smrg   active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), "");
39547ec681f3Smrg   return LLVMBuildSelect(ctx->builder, active, identity, tmp1, "");
39557ec681f3Smrg}
39567ec681f3Smrg
39577ec681f3Smrg/**
39587ec681f3Smrg * \param maxprefix specifies that the result only needs to be correct for a
39597ec681f3Smrg *     prefix of this many threads
39607ec681f3Smrg */
39617ec681f3Smrgstatic LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src,
39627ec681f3Smrg                                  LLVMValueRef identity, unsigned maxprefix, bool inclusive)
39637ec681f3Smrg{
39647ec681f3Smrg   LLVMValueRef result, tmp;
39657ec681f3Smrg
39667ec681f3Smrg   if (!inclusive)
39677ec681f3Smrg      src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix);
39687ec681f3Smrg
39697ec681f3Smrg   result = src;
39707ec681f3Smrg
39717ec681f3Smrg   if (ctx->chip_class <= GFX7) {
39727ec681f3Smrg      assert(maxprefix == 64);
39737ec681f3Smrg      LLVMValueRef tid = ac_get_thread_id(ctx);
39747ec681f3Smrg      LLVMValueRef active;
39757ec681f3Smrg      tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 0x00, 0x00));
39767ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
39777ec681f3Smrg                             LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""), ctx->i32_0, "");
39787ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
39797ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
39807ec681f3Smrg      tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 0x01, 0x00));
39817ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
39827ec681f3Smrg                             LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 2, 0), ""),
39837ec681f3Smrg                             ctx->i32_0, "");
39847ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
39857ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
39867ec681f3Smrg      tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 0x03, 0x00));
39877ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
39887ec681f3Smrg                             LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 4, 0), ""),
39897ec681f3Smrg                             ctx->i32_0, "");
39907ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
39917ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
39927ec681f3Smrg      tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 0x07, 0x00));
39937ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
39947ec681f3Smrg                             LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 8, 0), ""),
39957ec681f3Smrg                             ctx->i32_0, "");
39967ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
39977ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
39987ec681f3Smrg      tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 0x0f, 0x00));
39997ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
40007ec681f3Smrg                             LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, 0), ""),
40017ec681f3Smrg                             ctx->i32_0, "");
40027ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
40037ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
40047ec681f3Smrg      tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 0));
40057ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
40067ec681f3Smrg                             LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 32, 0), ""),
40077ec681f3Smrg                             ctx->i32_0, "");
40087ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
40097ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
40107ec681f3Smrg      return result;
40117ec681f3Smrg   }
40127ec681f3Smrg
40137ec681f3Smrg   if (maxprefix <= 1)
40147ec681f3Smrg      return result;
40157ec681f3Smrg   tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
40167ec681f3Smrg   result = ac_build_alu_op(ctx, result, tmp, op);
40177ec681f3Smrg   if (maxprefix <= 2)
40187ec681f3Smrg      return result;
40197ec681f3Smrg   tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
40207ec681f3Smrg   result = ac_build_alu_op(ctx, result, tmp, op);
40217ec681f3Smrg   if (maxprefix <= 3)
40227ec681f3Smrg      return result;
40237ec681f3Smrg   tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
40247ec681f3Smrg   result = ac_build_alu_op(ctx, result, tmp, op);
40257ec681f3Smrg   if (maxprefix <= 4)
40267ec681f3Smrg      return result;
40277ec681f3Smrg   tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
40287ec681f3Smrg   result = ac_build_alu_op(ctx, result, tmp, op);
40297ec681f3Smrg   if (maxprefix <= 8)
40307ec681f3Smrg      return result;
40317ec681f3Smrg   tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
40327ec681f3Smrg   result = ac_build_alu_op(ctx, result, tmp, op);
40337ec681f3Smrg   if (maxprefix <= 16)
40347ec681f3Smrg      return result;
40357ec681f3Smrg
40367ec681f3Smrg   if (ctx->chip_class >= GFX10) {
40377ec681f3Smrg      LLVMValueRef tid = ac_get_thread_id(ctx);
40387ec681f3Smrg      LLVMValueRef active;
40397ec681f3Smrg
40407ec681f3Smrg      tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false);
40417ec681f3Smrg
40427ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
40437ec681f3Smrg                             LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, false), ""),
40447ec681f3Smrg                             ctx->i32_0, "");
40457ec681f3Smrg
40467ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
40477ec681f3Smrg
40487ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
40497ec681f3Smrg
40507ec681f3Smrg      if (maxprefix <= 32)
40517ec681f3Smrg         return result;
40527ec681f3Smrg
40537ec681f3Smrg      tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
40547ec681f3Smrg
40557ec681f3Smrg      active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid, LLVMConstInt(ctx->i32, 32, false), "");
40567ec681f3Smrg
40577ec681f3Smrg      tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
40587ec681f3Smrg
40597ec681f3Smrg      result = ac_build_alu_op(ctx, result, tmp, op);
40607ec681f3Smrg      return result;
40617ec681f3Smrg   }
40627ec681f3Smrg
40637ec681f3Smrg   tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
40647ec681f3Smrg   result = ac_build_alu_op(ctx, result, tmp, op);
40657ec681f3Smrg   if (maxprefix <= 32)
40667ec681f3Smrg      return result;
40677ec681f3Smrg   tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
40687ec681f3Smrg   result = ac_build_alu_op(ctx, result, tmp, op);
40697ec681f3Smrg   return result;
40707ec681f3Smrg}
40717ec681f3Smrg
40727ec681f3SmrgLLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
40737ec681f3Smrg{
40747ec681f3Smrg   LLVMValueRef result;
40757ec681f3Smrg
40767ec681f3Smrg   if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
40777ec681f3Smrg      LLVMBuilderRef builder = ctx->builder;
40787ec681f3Smrg      src = LLVMBuildZExt(builder, src, ctx->i32, "");
40797ec681f3Smrg      result = ac_build_ballot(ctx, src);
40807ec681f3Smrg      result = ac_build_mbcnt(ctx, result);
40817ec681f3Smrg      result = LLVMBuildAdd(builder, result, src, "");
40827ec681f3Smrg      return result;
40837ec681f3Smrg   }
40847ec681f3Smrg
40857ec681f3Smrg   ac_build_optimization_barrier(ctx, &src, false);
40867ec681f3Smrg
40877ec681f3Smrg   LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
40887ec681f3Smrg   result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
40897ec681f3Smrg                             LLVMTypeOf(identity), "");
40907ec681f3Smrg   result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
40917ec681f3Smrg
40927ec681f3Smrg   return ac_build_wwm(ctx, result);
40937ec681f3Smrg}
40947ec681f3Smrg
40957ec681f3SmrgLLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
40967ec681f3Smrg{
40977ec681f3Smrg   LLVMValueRef result;
40987ec681f3Smrg
40997ec681f3Smrg   if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
41007ec681f3Smrg      LLVMBuilderRef builder = ctx->builder;
41017ec681f3Smrg      src = LLVMBuildZExt(builder, src, ctx->i32, "");
41027ec681f3Smrg      result = ac_build_ballot(ctx, src);
41037ec681f3Smrg      result = ac_build_mbcnt(ctx, result);
41047ec681f3Smrg      return result;
41057ec681f3Smrg   }
41067ec681f3Smrg
41077ec681f3Smrg   ac_build_optimization_barrier(ctx, &src, false);
41087ec681f3Smrg
41097ec681f3Smrg   LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
41107ec681f3Smrg   result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
41117ec681f3Smrg                             LLVMTypeOf(identity), "");
41127ec681f3Smrg   result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
41137ec681f3Smrg
41147ec681f3Smrg   return ac_build_wwm(ctx, result);
41157ec681f3Smrg}
41167ec681f3Smrg
41177ec681f3SmrgLLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op,
41187ec681f3Smrg                             unsigned cluster_size)
41197ec681f3Smrg{
41207ec681f3Smrg   if (cluster_size == 1)
41217ec681f3Smrg      return src;
41227ec681f3Smrg   ac_build_optimization_barrier(ctx, &src, false);
41237ec681f3Smrg   LLVMValueRef result, swap;
41247ec681f3Smrg   LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
41257ec681f3Smrg   result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
41267ec681f3Smrg                             LLVMTypeOf(identity), "");
41277ec681f3Smrg   swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
41287ec681f3Smrg   result = ac_build_alu_op(ctx, result, swap, op);
41297ec681f3Smrg   if (cluster_size == 2)
41307ec681f3Smrg      return ac_build_wwm(ctx, result);
41317ec681f3Smrg
41327ec681f3Smrg   swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
41337ec681f3Smrg   result = ac_build_alu_op(ctx, result, swap, op);
41347ec681f3Smrg   if (cluster_size == 4)
41357ec681f3Smrg      return ac_build_wwm(ctx, result);
41367ec681f3Smrg
41377ec681f3Smrg   if (ctx->chip_class >= GFX8)
41387ec681f3Smrg      swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
41397ec681f3Smrg   else
41407ec681f3Smrg      swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
41417ec681f3Smrg   result = ac_build_alu_op(ctx, result, swap, op);
41427ec681f3Smrg   if (cluster_size == 8)
41437ec681f3Smrg      return ac_build_wwm(ctx, result);
41447ec681f3Smrg
41457ec681f3Smrg   if (ctx->chip_class >= GFX8)
41467ec681f3Smrg      swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
41477ec681f3Smrg   else
41487ec681f3Smrg      swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
41497ec681f3Smrg   result = ac_build_alu_op(ctx, result, swap, op);
41507ec681f3Smrg   if (cluster_size == 16)
41517ec681f3Smrg      return ac_build_wwm(ctx, result);
41527ec681f3Smrg
41537ec681f3Smrg   if (ctx->chip_class >= GFX10)
41547ec681f3Smrg      swap = ac_build_permlane16(ctx, result, 0, true, false);
41557ec681f3Smrg   else if (ctx->chip_class >= GFX8 && cluster_size != 32)
41567ec681f3Smrg      swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
41577ec681f3Smrg   else
41587ec681f3Smrg      swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
41597ec681f3Smrg   result = ac_build_alu_op(ctx, result, swap, op);
41607ec681f3Smrg   if (cluster_size == 32)
41617ec681f3Smrg      return ac_build_wwm(ctx, result);
41627ec681f3Smrg
41637ec681f3Smrg   if (ctx->chip_class >= GFX8) {
41647ec681f3Smrg      if (ctx->wave_size == 64) {
41657ec681f3Smrg         if (ctx->chip_class >= GFX10)
41667ec681f3Smrg            swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
41677ec681f3Smrg         else
41687ec681f3Smrg            swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
41697ec681f3Smrg         result = ac_build_alu_op(ctx, result, swap, op);
41707ec681f3Smrg         result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
41717ec681f3Smrg      }
41727ec681f3Smrg
41737ec681f3Smrg      return ac_build_wwm(ctx, result);
41747ec681f3Smrg   } else {
41757ec681f3Smrg      swap = ac_build_readlane(ctx, result, ctx->i32_0);
41767ec681f3Smrg      result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
41777ec681f3Smrg      result = ac_build_alu_op(ctx, result, swap, op);
41787ec681f3Smrg      return ac_build_wwm(ctx, result);
41797ec681f3Smrg   }
41807ec681f3Smrg}
41817ec681f3Smrg
41827ec681f3Smrg/**
41837ec681f3Smrg * "Top half" of a scan that reduces per-wave values across an entire
41847ec681f3Smrg * workgroup.
41857ec681f3Smrg *
41867ec681f3Smrg * The source value must be present in the highest lane of the wave, and the
41877ec681f3Smrg * highest lane must be live.
41887ec681f3Smrg */
41897ec681f3Smrgvoid ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
41907ec681f3Smrg{
41917ec681f3Smrg   if (ws->maxwaves <= 1)
41927ec681f3Smrg      return;
41937ec681f3Smrg
41947ec681f3Smrg   const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
41957ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
41967ec681f3Smrg   LLVMValueRef tid = ac_get_thread_id(ctx);
41977ec681f3Smrg   LLVMValueRef tmp;
41987ec681f3Smrg
41997ec681f3Smrg   tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
42007ec681f3Smrg   ac_build_ifcc(ctx, tmp, 1000);
42017ec681f3Smrg   LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
42027ec681f3Smrg   ac_build_endif(ctx, 1000);
42037ec681f3Smrg}
42047ec681f3Smrg
42057ec681f3Smrg/**
42067ec681f3Smrg * "Bottom half" of a scan that reduces per-wave values across an entire
42077ec681f3Smrg * workgroup.
42087ec681f3Smrg *
42097ec681f3Smrg * The caller must place a barrier between the top and bottom halves.
42107ec681f3Smrg */
42117ec681f3Smrgvoid ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
42127ec681f3Smrg{
42137ec681f3Smrg   const LLVMTypeRef type = LLVMTypeOf(ws->src);
42147ec681f3Smrg   const LLVMValueRef identity = get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
42157ec681f3Smrg
42167ec681f3Smrg   if (ws->maxwaves <= 1) {
42177ec681f3Smrg      ws->result_reduce = ws->src;
42187ec681f3Smrg      ws->result_inclusive = ws->src;
42197ec681f3Smrg      ws->result_exclusive = identity;
42207ec681f3Smrg      return;
42217ec681f3Smrg   }
42227ec681f3Smrg   assert(ws->maxwaves <= 32);
42237ec681f3Smrg
42247ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
42257ec681f3Smrg   LLVMValueRef tid = ac_get_thread_id(ctx);
42267ec681f3Smrg   LLVMBasicBlockRef bbs[2];
42277ec681f3Smrg   LLVMValueRef phivalues_scan[2];
42287ec681f3Smrg   LLVMValueRef tmp, tmp2;
42297ec681f3Smrg
42307ec681f3Smrg   bbs[0] = LLVMGetInsertBlock(builder);
42317ec681f3Smrg   phivalues_scan[0] = LLVMGetUndef(type);
42327ec681f3Smrg
42337ec681f3Smrg   if (ws->enable_reduce)
42347ec681f3Smrg      tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
42357ec681f3Smrg   else if (ws->enable_inclusive)
42367ec681f3Smrg      tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
42377ec681f3Smrg   else
42387ec681f3Smrg      tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
42397ec681f3Smrg   ac_build_ifcc(ctx, tmp, 1001);
42407ec681f3Smrg   {
42417ec681f3Smrg      tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
42427ec681f3Smrg
42437ec681f3Smrg      ac_build_optimization_barrier(ctx, &tmp, false);
42447ec681f3Smrg
42457ec681f3Smrg      bbs[1] = LLVMGetInsertBlock(builder);
42467ec681f3Smrg      phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
42477ec681f3Smrg   }
42487ec681f3Smrg   ac_build_endif(ctx, 1001);
42497ec681f3Smrg
42507ec681f3Smrg   const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
42517ec681f3Smrg
42527ec681f3Smrg   if (ws->enable_reduce) {
42537ec681f3Smrg      tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
42547ec681f3Smrg      ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
42557ec681f3Smrg   }
42567ec681f3Smrg   if (ws->enable_inclusive)
42577ec681f3Smrg      ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
42587ec681f3Smrg   if (ws->enable_exclusive) {
42597ec681f3Smrg      tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
42607ec681f3Smrg      tmp = ac_build_readlane(ctx, scan, tmp);
42617ec681f3Smrg      tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
42627ec681f3Smrg      ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
42637ec681f3Smrg   }
42647ec681f3Smrg}
42657ec681f3Smrg
42667ec681f3Smrg/**
42677ec681f3Smrg * Inclusive scan of a per-wave value across an entire workgroup.
42687ec681f3Smrg *
42697ec681f3Smrg * This implies an s_barrier instruction.
42707ec681f3Smrg *
42717ec681f3Smrg * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads
42727ec681f3Smrg * of the workgroup are live. (This requirement cannot easily be relaxed in a
42737ec681f3Smrg * useful manner because of the barrier in the algorithm.)
42747ec681f3Smrg */
42757ec681f3Smrgvoid ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
42767ec681f3Smrg{
42777ec681f3Smrg   ac_build_wg_wavescan_top(ctx, ws);
42787ec681f3Smrg   ac_build_s_barrier(ctx);
42797ec681f3Smrg   ac_build_wg_wavescan_bottom(ctx, ws);
42807ec681f3Smrg}
42817ec681f3Smrg
42827ec681f3Smrg/**
42837ec681f3Smrg * "Top half" of a scan that reduces per-thread values across an entire
42847ec681f3Smrg * workgroup.
42857ec681f3Smrg *
42867ec681f3Smrg * All lanes must be active when this code runs.
42877ec681f3Smrg */
42887ec681f3Smrgvoid ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
42897ec681f3Smrg{
42907ec681f3Smrg   if (ws->enable_exclusive) {
42917ec681f3Smrg      ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
42927ec681f3Smrg      if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
42937ec681f3Smrg         ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
42947ec681f3Smrg      ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
42957ec681f3Smrg   } else {
42967ec681f3Smrg      ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
42977ec681f3Smrg   }
42987ec681f3Smrg
42997ec681f3Smrg   bool enable_inclusive = ws->enable_inclusive;
43007ec681f3Smrg   bool enable_exclusive = ws->enable_exclusive;
43017ec681f3Smrg   ws->enable_inclusive = false;
43027ec681f3Smrg   ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
43037ec681f3Smrg   ac_build_wg_wavescan_top(ctx, ws);
43047ec681f3Smrg   ws->enable_inclusive = enable_inclusive;
43057ec681f3Smrg   ws->enable_exclusive = enable_exclusive;
43067ec681f3Smrg}
43077ec681f3Smrg
43087ec681f3Smrg/**
43097ec681f3Smrg * "Bottom half" of a scan that reduces per-thread values across an entire
43107ec681f3Smrg * workgroup.
43117ec681f3Smrg *
43127ec681f3Smrg * The caller must place a barrier between the top and bottom halves.
43137ec681f3Smrg */
43147ec681f3Smrgvoid ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
43157ec681f3Smrg{
43167ec681f3Smrg   bool enable_inclusive = ws->enable_inclusive;
43177ec681f3Smrg   bool enable_exclusive = ws->enable_exclusive;
43187ec681f3Smrg   ws->enable_inclusive = false;
43197ec681f3Smrg   ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
43207ec681f3Smrg   ac_build_wg_wavescan_bottom(ctx, ws);
43217ec681f3Smrg   ws->enable_inclusive = enable_inclusive;
43227ec681f3Smrg   ws->enable_exclusive = enable_exclusive;
43237ec681f3Smrg
43247ec681f3Smrg   /* ws->result_reduce is already the correct value */
43257ec681f3Smrg   if (ws->enable_inclusive)
43267ec681f3Smrg      ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
43277ec681f3Smrg   if (ws->enable_exclusive)
43287ec681f3Smrg      ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
43297ec681f3Smrg}
43307ec681f3Smrg
43317ec681f3Smrg/**
43327ec681f3Smrg * A scan that reduces per-thread values across an entire workgroup.
43337ec681f3Smrg *
43347ec681f3Smrg * The caller must ensure that all lanes are active when this code runs
43357ec681f3Smrg * (WWM is insufficient!), because there is an implied barrier.
43367ec681f3Smrg */
43377ec681f3Smrgvoid ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
43387ec681f3Smrg{
43397ec681f3Smrg   ac_build_wg_scan_top(ctx, ws);
43407ec681f3Smrg   ac_build_s_barrier(ctx);
43417ec681f3Smrg   ac_build_wg_scan_bottom(ctx, ws);
43427ec681f3Smrg}
43437ec681f3Smrg
43447ec681f3SmrgLLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0,
43457ec681f3Smrg                                   unsigned lane1, unsigned lane2, unsigned lane3)
43467ec681f3Smrg{
43477ec681f3Smrg   unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
43487ec681f3Smrg   if (ctx->chip_class >= GFX8) {
43497ec681f3Smrg      return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
43507ec681f3Smrg   } else {
43517ec681f3Smrg      return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
43527ec681f3Smrg   }
43537ec681f3Smrg}
43547ec681f3Smrg
43557ec681f3SmrgLLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
43567ec681f3Smrg{
43577ec681f3Smrg   LLVMTypeRef type = LLVMTypeOf(src);
43587ec681f3Smrg   LLVMValueRef result;
43597ec681f3Smrg
43607ec681f3Smrg   index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
43617ec681f3Smrg   src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
43627ec681f3Smrg
43637ec681f3Smrg   result =
43647ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, (LLVMValueRef[]){index, src}, 2,
43657ec681f3Smrg                         AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
43667ec681f3Smrg   return LLVMBuildTrunc(ctx->builder, result, type, "");
43677ec681f3Smrg}
43687ec681f3Smrg
43697ec681f3SmrgLLVMValueRef ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
43707ec681f3Smrg{
43717ec681f3Smrg   LLVMTypeRef type;
43727ec681f3Smrg   char *intr;
43737ec681f3Smrg
43747ec681f3Smrg   if (bitsize == 16) {
43757ec681f3Smrg      intr = "llvm.amdgcn.frexp.exp.i16.f16";
43767ec681f3Smrg      type = ctx->i16;
43777ec681f3Smrg   } else if (bitsize == 32) {
43787ec681f3Smrg      intr = "llvm.amdgcn.frexp.exp.i32.f32";
43797ec681f3Smrg      type = ctx->i32;
43807ec681f3Smrg   } else {
43817ec681f3Smrg      intr = "llvm.amdgcn.frexp.exp.i32.f64";
43827ec681f3Smrg      type = ctx->i32;
43837ec681f3Smrg   }
43847ec681f3Smrg
43857ec681f3Smrg   LLVMValueRef params[] = {
43867ec681f3Smrg      src0,
43877ec681f3Smrg   };
43887ec681f3Smrg   return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
43897ec681f3Smrg}
43907ec681f3SmrgLLVMValueRef ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
43917ec681f3Smrg{
43927ec681f3Smrg   LLVMTypeRef type;
43937ec681f3Smrg   char *intr;
43947ec681f3Smrg
43957ec681f3Smrg   if (bitsize == 16) {
43967ec681f3Smrg      intr = "llvm.amdgcn.frexp.mant.f16";
43977ec681f3Smrg      type = ctx->f16;
43987ec681f3Smrg   } else if (bitsize == 32) {
43997ec681f3Smrg      intr = "llvm.amdgcn.frexp.mant.f32";
44007ec681f3Smrg      type = ctx->f32;
44017ec681f3Smrg   } else {
44027ec681f3Smrg      intr = "llvm.amdgcn.frexp.mant.f64";
44037ec681f3Smrg      type = ctx->f64;
44047ec681f3Smrg   }
44057ec681f3Smrg
44067ec681f3Smrg   LLVMValueRef params[] = {
44077ec681f3Smrg      src0,
44087ec681f3Smrg   };
44097ec681f3Smrg   return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
44107ec681f3Smrg}
44117ec681f3Smrg
44127ec681f3SmrgLLVMValueRef ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
44137ec681f3Smrg{
44147ec681f3Smrg   LLVMTypeRef type;
44157ec681f3Smrg   char *intr;
44167ec681f3Smrg
44177ec681f3Smrg   if (bitsize == 16) {
44187ec681f3Smrg      intr = "llvm.canonicalize.f16";
44197ec681f3Smrg      type = ctx->f16;
44207ec681f3Smrg   } else if (bitsize == 32) {
44217ec681f3Smrg      intr = "llvm.canonicalize.f32";
44227ec681f3Smrg      type = ctx->f32;
44237ec681f3Smrg   } else {
44247ec681f3Smrg      intr = "llvm.canonicalize.f64";
44257ec681f3Smrg      type = ctx->f64;
44267ec681f3Smrg   }
44277ec681f3Smrg
44287ec681f3Smrg   LLVMValueRef params[] = {
44297ec681f3Smrg      src0,
44307ec681f3Smrg   };
44317ec681f3Smrg   return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
44327ec681f3Smrg}
44337ec681f3Smrg
44347ec681f3Smrg/*
44357ec681f3Smrg * this takes an I,J coordinate pair,
44367ec681f3Smrg * and works out the X and Y derivatives.
44377ec681f3Smrg * it returns DDX(I), DDX(J), DDY(I), DDY(J).
44387ec681f3Smrg */
44397ec681f3SmrgLLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
44407ec681f3Smrg{
44417ec681f3Smrg   LLVMValueRef result[4], a;
44427ec681f3Smrg   unsigned i;
44437ec681f3Smrg
44447ec681f3Smrg   for (i = 0; i < 2; i++) {
44457ec681f3Smrg      a = LLVMBuildExtractElement(ctx->builder, interp_ij, LLVMConstInt(ctx->i32, i, false), "");
44467ec681f3Smrg      result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
44477ec681f3Smrg      result[2 + i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
44487ec681f3Smrg   }
44497ec681f3Smrg   return ac_build_gather_values(ctx, result, 4);
44507ec681f3Smrg}
44517ec681f3Smrg
44527ec681f3SmrgLLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
44537ec681f3Smrg{
44547ec681f3Smrg   LLVMValueRef result;
44557ec681f3Smrg
44567ec681f3Smrg   if (LLVM_VERSION_MAJOR >= 13) {
44577ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.amdgcn.live.mask", ctx->i1, NULL, 0,
44587ec681f3Smrg                                  AC_FUNC_ATTR_READONLY | AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
44597ec681f3Smrg   } else {
44607ec681f3Smrg      result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0,
44617ec681f3Smrg                                  AC_FUNC_ATTR_READNONE);
44627ec681f3Smrg   }
44637ec681f3Smrg   return LLVMBuildNot(ctx->builder, result, "");
44647ec681f3Smrg}
44657ec681f3Smrg
44667ec681f3SmrgLLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
44677ec681f3Smrg{
44687ec681f3Smrg   if (!ctx->postponed_kill)
44697ec681f3Smrg      return ac_build_load_helper_invocation(ctx);
44707ec681f3Smrg
44717ec681f3Smrg   /* postponed_kill should be NULL on LLVM 13+ */
44727ec681f3Smrg   assert(LLVM_VERSION_MAJOR < 13);
44737ec681f3Smrg
44747ec681f3Smrg   /* !(exact && postponed) */
44757ec681f3Smrg   LLVMValueRef exact =
44767ec681f3Smrg      ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, AC_FUNC_ATTR_READNONE);
44777ec681f3Smrg
44787ec681f3Smrg   LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
44797ec681f3Smrg   return LLVMBuildNot(ctx->builder, LLVMBuildAnd(ctx->builder, exact, postponed, ""), "");
44807ec681f3Smrg}
44817ec681f3Smrg
44827ec681f3SmrgLLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
44837ec681f3Smrg                           unsigned num_args)
44847ec681f3Smrg{
44857ec681f3Smrg   LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
44867ec681f3Smrg   LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
44877ec681f3Smrg   return ret;
44887ec681f3Smrg}
44897ec681f3Smrg
44907ec681f3Smrgvoid ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil,
44917ec681f3Smrg                     LLVMValueRef samplemask, struct ac_export_args *args)
44927ec681f3Smrg{
44937ec681f3Smrg   unsigned mask = 0;
44947ec681f3Smrg   unsigned format = ac_get_spi_shader_z_format(depth != NULL, stencil != NULL, samplemask != NULL);
44957ec681f3Smrg
44967ec681f3Smrg   assert(depth || stencil || samplemask);
44977ec681f3Smrg
44987ec681f3Smrg   memset(args, 0, sizeof(*args));
44997ec681f3Smrg
45007ec681f3Smrg   args->valid_mask = 1; /* whether the EXEC mask is valid */
45017ec681f3Smrg   args->done = 1;       /* DONE bit */
45027ec681f3Smrg
45037ec681f3Smrg   /* Specify the target we are exporting */
45047ec681f3Smrg   args->target = V_008DFC_SQ_EXP_MRTZ;
45057ec681f3Smrg
45067ec681f3Smrg   args->compr = 0;                       /* COMP flag */
45077ec681f3Smrg   args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
45087ec681f3Smrg   args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
45097ec681f3Smrg   args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
45107ec681f3Smrg   args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
45117ec681f3Smrg
45127ec681f3Smrg   if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
45137ec681f3Smrg      assert(!depth);
45147ec681f3Smrg      args->compr = 1; /* COMPR flag */
45157ec681f3Smrg
45167ec681f3Smrg      if (stencil) {
45177ec681f3Smrg         /* Stencil should be in X[23:16]. */
45187ec681f3Smrg         stencil = ac_to_integer(ctx, stencil);
45197ec681f3Smrg         stencil = LLVMBuildShl(ctx->builder, stencil, LLVMConstInt(ctx->i32, 16, 0), "");
45207ec681f3Smrg         args->out[0] = ac_to_float(ctx, stencil);
45217ec681f3Smrg         mask |= 0x3;
45227ec681f3Smrg      }
45237ec681f3Smrg      if (samplemask) {
45247ec681f3Smrg         /* SampleMask should be in Y[15:0]. */
45257ec681f3Smrg         args->out[1] = samplemask;
45267ec681f3Smrg         mask |= 0xc;
45277ec681f3Smrg      }
45287ec681f3Smrg   } else {
45297ec681f3Smrg      if (depth) {
45307ec681f3Smrg         args->out[0] = depth;
45317ec681f3Smrg         mask |= 0x1;
45327ec681f3Smrg      }
45337ec681f3Smrg      if (stencil) {
45347ec681f3Smrg         args->out[1] = stencil;
45357ec681f3Smrg         mask |= 0x2;
45367ec681f3Smrg      }
45377ec681f3Smrg      if (samplemask) {
45387ec681f3Smrg         args->out[2] = samplemask;
45397ec681f3Smrg         mask |= 0x4;
45407ec681f3Smrg      }
45417ec681f3Smrg   }
45427ec681f3Smrg
45437ec681f3Smrg   /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
45447ec681f3Smrg    * at the X writemask component. */
45457ec681f3Smrg   if (ctx->chip_class == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN)
45467ec681f3Smrg      mask |= 0x1;
45477ec681f3Smrg
45487ec681f3Smrg   /* Specify which components to enable */
45497ec681f3Smrg   args->enabled_channels = mask;
45507ec681f3Smrg}
45517ec681f3Smrg
45527ec681f3Smrg/* Send GS Alloc Req message from the first wave of the group to SPI.
45537ec681f3Smrg * Message payload is:
45547ec681f3Smrg * - bits 0..10: vertices in group
45557ec681f3Smrg * - bits 12..22: primitives in group
45567ec681f3Smrg */
45577ec681f3Smrgvoid ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id,
45587ec681f3Smrg                                   LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt)
45597ec681f3Smrg{
45607ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
45617ec681f3Smrg   LLVMValueRef tmp;
45627ec681f3Smrg   bool export_dummy_prim = false;
45637ec681f3Smrg
45647ec681f3Smrg   /* HW workaround for a GPU hang with 100% culling.
45657ec681f3Smrg    * We always have to export at least 1 primitive.
45667ec681f3Smrg    * Export a degenerate triangle using vertex 0 for all 3 vertices.
45677ec681f3Smrg    */
45687ec681f3Smrg   if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) {
45697ec681f3Smrg      assert(vtx_cnt == ctx->i32_0);
45707ec681f3Smrg      prim_cnt = ctx->i32_1;
45717ec681f3Smrg      vtx_cnt = ctx->i32_1;
45727ec681f3Smrg      export_dummy_prim = true;
45737ec681f3Smrg   }
45747ec681f3Smrg
45757ec681f3Smrg   ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020);
45767ec681f3Smrg
45777ec681f3Smrg   tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false), "");
45787ec681f3Smrg   tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
45797ec681f3Smrg   ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp);
45807ec681f3Smrg
45817ec681f3Smrg   if (export_dummy_prim) {
45827ec681f3Smrg      struct ac_ngg_prim prim = {0};
45837ec681f3Smrg      /* The vertex indices are 0,0,0. */
45847ec681f3Smrg      prim.passthrough = ctx->i32_0;
45857ec681f3Smrg
45867ec681f3Smrg      struct ac_export_args pos = {0};
45877ec681f3Smrg      /* The hw culls primitives with NaN. */
45887ec681f3Smrg      pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = LLVMConstReal(ctx->f32, NAN);
45897ec681f3Smrg      pos.target = V_008DFC_SQ_EXP_POS;
45907ec681f3Smrg      pos.enabled_channels = 0xf;
45917ec681f3Smrg      pos.done = true;
45927ec681f3Smrg
45937ec681f3Smrg      ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), ctx->i32_0, ""),
45947ec681f3Smrg                    5021);
45957ec681f3Smrg      ac_build_export_prim(ctx, &prim);
45967ec681f3Smrg      ac_build_export(ctx, &pos);
45977ec681f3Smrg      ac_build_endif(ctx, 5021);
45987ec681f3Smrg   }
45997ec681f3Smrg
46007ec681f3Smrg   ac_build_endif(ctx, 5020);
46017ec681f3Smrg}
46027ec681f3Smrg
46037ec681f3Smrg
46047ec681f3SmrgLLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx,
46057ec681f3Smrg                                          const struct ac_shader_args *args)
46067ec681f3Smrg{
46077ec681f3Smrg   /* Use the following trick to extract the edge flags:
46087ec681f3Smrg    *   extracted = v_and_b32 gs_invocation_id, 0x700 ; get edge flags at bits 8, 9, 10
46097ec681f3Smrg    *   shifted = v_mul_u32_u24 extracted, 0x80402u   ; shift the bits: 8->9, 9->19, 10->29
46107ec681f3Smrg    *   result = v_and_b32 shifted, 0x20080200        ; remove garbage
46117ec681f3Smrg    */
46127ec681f3Smrg   LLVMValueRef tmp = LLVMBuildAnd(ctx->builder,
46137ec681f3Smrg                                   ac_get_arg(ctx, args->gs_invocation_id),
46147ec681f3Smrg                                   LLVMConstInt(ctx->i32, 0x700, 0), "");
46157ec681f3Smrg   tmp = LLVMBuildMul(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x80402u, 0), "");
46167ec681f3Smrg   return LLVMBuildAnd(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x20080200, 0), "");
46177ec681f3Smrg}
46187ec681f3Smrg
46197ec681f3SmrgLLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
46207ec681f3Smrg{
46217ec681f3Smrg   /* The prim export format is:
46227ec681f3Smrg    *  - bits 0..8: index 0
46237ec681f3Smrg    *  - bit 9: edge flag 0
46247ec681f3Smrg    *  - bits 10..18: index 1
46257ec681f3Smrg    *  - bit 19: edge flag 1
46267ec681f3Smrg    *  - bits 20..28: index 2
46277ec681f3Smrg    *  - bit 29: edge flag 2
46287ec681f3Smrg    *  - bit 31: null primitive (skip)
46297ec681f3Smrg    */
46307ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
46317ec681f3Smrg   LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, "");
46327ec681f3Smrg   LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), "");
46337ec681f3Smrg   result = LLVMBuildOr(ctx->builder, result, prim->edgeflags, "");
46347ec681f3Smrg
46357ec681f3Smrg   for (unsigned i = 0; i < prim->num_vertices; ++i) {
46367ec681f3Smrg      tmp = LLVMBuildShl(builder, prim->index[i], LLVMConstInt(ctx->i32, 10 * i, false), "");
46377ec681f3Smrg      result = LLVMBuildOr(builder, result, tmp, "");
46387ec681f3Smrg   }
46397ec681f3Smrg   return result;
46407ec681f3Smrg}
46417ec681f3Smrg
46427ec681f3Smrgvoid ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
46437ec681f3Smrg{
46447ec681f3Smrg   struct ac_export_args args;
46457ec681f3Smrg
46467ec681f3Smrg   if (prim->passthrough) {
46477ec681f3Smrg      args.out[0] = prim->passthrough;
46487ec681f3Smrg   } else {
46497ec681f3Smrg      args.out[0] = ac_pack_prim_export(ctx, prim);
46507ec681f3Smrg   }
46517ec681f3Smrg
46527ec681f3Smrg   args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, "");
46537ec681f3Smrg   args.out[1] = LLVMGetUndef(ctx->f32);
46547ec681f3Smrg   args.out[2] = LLVMGetUndef(ctx->f32);
46557ec681f3Smrg   args.out[3] = LLVMGetUndef(ctx->f32);
46567ec681f3Smrg
46577ec681f3Smrg   args.target = V_008DFC_SQ_EXP_PRIM;
46587ec681f3Smrg   args.enabled_channels = 1;
46597ec681f3Smrg   args.done = true;
46607ec681f3Smrg   args.valid_mask = false;
46617ec681f3Smrg   args.compr = false;
46627ec681f3Smrg
46637ec681f3Smrg   ac_build_export(ctx, &args);
46647ec681f3Smrg}
46657ec681f3Smrg
46667ec681f3Smrgstatic LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx)
46677ec681f3Smrg{
46687ec681f3Smrg   if (type == AC_ARG_FLOAT) {
46697ec681f3Smrg      return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
46707ec681f3Smrg   } else if (type == AC_ARG_INT) {
46717ec681f3Smrg      return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
46727ec681f3Smrg   } else {
46737ec681f3Smrg      LLVMTypeRef ptr_type;
46747ec681f3Smrg      switch (type) {
46757ec681f3Smrg      case AC_ARG_CONST_PTR:
46767ec681f3Smrg         ptr_type = ctx->i8;
46777ec681f3Smrg         break;
46787ec681f3Smrg      case AC_ARG_CONST_FLOAT_PTR:
46797ec681f3Smrg         ptr_type = ctx->f32;
46807ec681f3Smrg         break;
46817ec681f3Smrg      case AC_ARG_CONST_PTR_PTR:
46827ec681f3Smrg         ptr_type = ac_array_in_const32_addr_space(ctx->i8);
46837ec681f3Smrg         break;
46847ec681f3Smrg      case AC_ARG_CONST_DESC_PTR:
46857ec681f3Smrg         ptr_type = ctx->v4i32;
46867ec681f3Smrg         break;
46877ec681f3Smrg      case AC_ARG_CONST_IMAGE_PTR:
46887ec681f3Smrg         ptr_type = ctx->v8i32;
46897ec681f3Smrg         break;
46907ec681f3Smrg      default:
46917ec681f3Smrg         unreachable("unknown arg type");
46927ec681f3Smrg      }
46937ec681f3Smrg      if (size == 1) {
46947ec681f3Smrg         return ac_array_in_const32_addr_space(ptr_type);
46957ec681f3Smrg      } else {
46967ec681f3Smrg         assert(size == 2);
46977ec681f3Smrg         return ac_array_in_const_addr_space(ptr_type);
46987ec681f3Smrg      }
46997ec681f3Smrg   }
47007ec681f3Smrg}
47017ec681f3Smrg
47027ec681f3SmrgLLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
47037ec681f3Smrg                           enum ac_llvm_calling_convention convention, const char *name,
47047ec681f3Smrg                           LLVMTypeRef ret_type, LLVMModuleRef module)
47057ec681f3Smrg{
47067ec681f3Smrg   LLVMTypeRef arg_types[AC_MAX_ARGS];
47077ec681f3Smrg
47087ec681f3Smrg   for (unsigned i = 0; i < args->arg_count; i++) {
47097ec681f3Smrg      arg_types[i] = arg_llvm_type(args->args[i].type, args->args[i].size, ctx);
47107ec681f3Smrg   }
47117ec681f3Smrg
47127ec681f3Smrg   LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, args->arg_count, 0);
47137ec681f3Smrg
47147ec681f3Smrg   LLVMValueRef main_function = LLVMAddFunction(module, name, main_function_type);
47157ec681f3Smrg   LLVMBasicBlockRef main_function_body =
47167ec681f3Smrg      LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body");
47177ec681f3Smrg   LLVMPositionBuilderAtEnd(ctx->builder, main_function_body);
47187ec681f3Smrg
47197ec681f3Smrg   LLVMSetFunctionCallConv(main_function, convention);
47207ec681f3Smrg   for (unsigned i = 0; i < args->arg_count; ++i) {
47217ec681f3Smrg      LLVMValueRef P = LLVMGetParam(main_function, i);
47227ec681f3Smrg
47237ec681f3Smrg      if (args->args[i].file != AC_ARG_SGPR)
47247ec681f3Smrg         continue;
47257ec681f3Smrg
47267ec681f3Smrg      ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG);
47277ec681f3Smrg
47287ec681f3Smrg      if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
47297ec681f3Smrg         ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
47307ec681f3Smrg         ac_add_attr_dereferenceable(P, UINT64_MAX);
47317ec681f3Smrg         ac_add_attr_alignment(P, 4);
47327ec681f3Smrg      }
47337ec681f3Smrg   }
47347ec681f3Smrg
47357ec681f3Smrg   ctx->main_function = main_function;
47367ec681f3Smrg
47377ec681f3Smrg   /* Enable denormals for FP16 and FP64: */
47387ec681f3Smrg   LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", "ieee,ieee");
47397ec681f3Smrg   /* Disable denormals for FP32: */
47407ec681f3Smrg   LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32",
47417ec681f3Smrg                                      "preserve-sign,preserve-sign");
47427ec681f3Smrg   return main_function;
47437ec681f3Smrg}
47447ec681f3Smrg
47457ec681f3Smrgvoid ac_build_s_endpgm(struct ac_llvm_context *ctx)
47467ec681f3Smrg{
47477ec681f3Smrg   LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
47487ec681f3Smrg   LLVMValueRef code = LLVMConstInlineAsm(calltype, "s_endpgm", "", true, false);
47497ec681f3Smrg   LLVMBuildCall(ctx->builder, code, NULL, 0, "");
47507ec681f3Smrg}
47517ec681f3Smrg
47527ec681f3Smrg/**
47537ec681f3Smrg * Convert triangle strip indices to triangle indices. This is used to decompose
47547ec681f3Smrg * triangle strips into triangles.
47557ec681f3Smrg */
47567ec681f3Smrgvoid ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd,
47577ec681f3Smrg                                                 LLVMValueRef flatshade_first,
47587ec681f3Smrg                                                 LLVMValueRef index[3])
47597ec681f3Smrg{
47607ec681f3Smrg   LLVMBuilderRef builder = ctx->builder;
47617ec681f3Smrg   LLVMValueRef out[3];
47627ec681f3Smrg
47637ec681f3Smrg   /* We need to change the vertex order for odd triangles to get correct
47647ec681f3Smrg    * front/back facing by swapping 2 vertex indices, but we also have to
47657ec681f3Smrg    * keep the provoking vertex in the same place.
47667ec681f3Smrg    *
47677ec681f3Smrg    * If the first vertex is provoking, swap index 1 and 2.
47687ec681f3Smrg    * If the last vertex is provoking, swap index 0 and 1.
47697ec681f3Smrg    */
47707ec681f3Smrg   out[0] = LLVMBuildSelect(builder, flatshade_first, index[0],
47717ec681f3Smrg                            LLVMBuildSelect(builder, is_odd, index[1], index[0], ""), "");
47727ec681f3Smrg   out[1] = LLVMBuildSelect(builder, flatshade_first,
47737ec681f3Smrg                            LLVMBuildSelect(builder, is_odd, index[2], index[1], ""),
47747ec681f3Smrg                            LLVMBuildSelect(builder, is_odd, index[0], index[1], ""), "");
47757ec681f3Smrg   out[2] = LLVMBuildSelect(builder, flatshade_first,
47767ec681f3Smrg                            LLVMBuildSelect(builder, is_odd, index[1], index[2], ""), index[2], "");
47777ec681f3Smrg   memcpy(index, out, sizeof(out));
47787ec681f3Smrg}
4779