1b8e80941Smrg/* 2b8e80941Smrg * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org> 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Rob Clark <robclark@freedesktop.org> 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "util/u_math.h" 28b8e80941Smrg 29b8e80941Smrg#include "ir3_compiler.h" 30b8e80941Smrg#include "ir3_context.h" 31b8e80941Smrg#include "ir3_image.h" 32b8e80941Smrg#include "ir3_shader.h" 33b8e80941Smrg#include "ir3_nir.h" 34b8e80941Smrg 35b8e80941Smrgstruct ir3_context * 36b8e80941Smrgir3_context_init(struct ir3_compiler *compiler, 37b8e80941Smrg struct ir3_shader_variant *so) 38b8e80941Smrg{ 39b8e80941Smrg struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); 40b8e80941Smrg 41b8e80941Smrg if (compiler->gpu_id >= 400) { 42b8e80941Smrg if (so->type == MESA_SHADER_VERTEX) { 43b8e80941Smrg ctx->astc_srgb = so->key.vastc_srgb; 44b8e80941Smrg } else if (so->type == MESA_SHADER_FRAGMENT) { 45b8e80941Smrg ctx->astc_srgb = so->key.fastc_srgb; 46b8e80941Smrg } 47b8e80941Smrg 48b8e80941Smrg } else { 49b8e80941Smrg if (so->type == MESA_SHADER_VERTEX) { 50b8e80941Smrg ctx->samples = so->key.vsamples; 51b8e80941Smrg } else if (so->type == MESA_SHADER_FRAGMENT) { 52b8e80941Smrg ctx->samples = so->key.fsamples; 53b8e80941Smrg } 54b8e80941Smrg } 55b8e80941Smrg 56b8e80941Smrg if (compiler->gpu_id >= 600) { 57b8e80941Smrg ctx->funcs = &ir3_a6xx_funcs; 58b8e80941Smrg } else if (compiler->gpu_id >= 400) { 59b8e80941Smrg ctx->funcs = &ir3_a4xx_funcs; 60b8e80941Smrg } 61b8e80941Smrg 62b8e80941Smrg ctx->compiler = compiler; 63b8e80941Smrg ctx->so = so; 64b8e80941Smrg ctx->def_ht = _mesa_hash_table_create(ctx, 65b8e80941Smrg _mesa_hash_pointer, _mesa_key_pointer_equal); 66b8e80941Smrg ctx->block_ht = _mesa_hash_table_create(ctx, 67b8e80941Smrg _mesa_hash_pointer, _mesa_key_pointer_equal); 68b8e80941Smrg 69b8e80941Smrg /* TODO: maybe generate some sort of bitmask of what key 70b8e80941Smrg * lowers vs what shader has (ie. no need to lower 71b8e80941Smrg * texture clamp lowering if no texture sample instrs).. 72b8e80941Smrg * although should be done further up the stack to avoid 73b8e80941Smrg * creating duplicate variants.. 74b8e80941Smrg */ 75b8e80941Smrg 76b8e80941Smrg if (ir3_key_lowers_nir(&so->key)) { 77b8e80941Smrg nir_shader *s = nir_shader_clone(ctx, so->shader->nir); 78b8e80941Smrg ctx->s = ir3_optimize_nir(so->shader, s, &so->key); 79b8e80941Smrg } else { 80b8e80941Smrg /* fast-path for shader key that lowers nothing in NIR: */ 81b8e80941Smrg ctx->s = nir_shader_clone(ctx, so->shader->nir); 82b8e80941Smrg } 83b8e80941Smrg 84b8e80941Smrg /* this needs to be the last pass run, so do this here instead of 85b8e80941Smrg * in ir3_optimize_nir(): 86b8e80941Smrg */ 87b8e80941Smrg NIR_PASS_V(ctx->s, nir_lower_bool_to_int32); 88b8e80941Smrg NIR_PASS_V(ctx->s, nir_lower_locals_to_regs); 89b8e80941Smrg NIR_PASS_V(ctx->s, nir_convert_from_ssa, true); 90b8e80941Smrg 91b8e80941Smrg if (ir3_shader_debug & IR3_DBG_DISASM) { 92b8e80941Smrg DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}", 93b8e80941Smrg so->shader->id, so->id, so->type, 94b8e80941Smrg so->key.color_two_side, so->key.half_precision); 95b8e80941Smrg nir_print_shader(ctx->s, stdout); 96b8e80941Smrg } 97b8e80941Smrg 98b8e80941Smrg if (shader_debug_enabled(so->type)) { 99b8e80941Smrg fprintf(stderr, "NIR (final form) for %s shader:\n", 100b8e80941Smrg _mesa_shader_stage_to_string(so->type)); 101b8e80941Smrg nir_print_shader(ctx->s, stderr); 102b8e80941Smrg } 103b8e80941Smrg 104b8e80941Smrg ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); 105b8e80941Smrg 106b8e80941Smrg so->num_uniforms = ctx->s->num_uniforms; 107b8e80941Smrg so->num_ubos = ctx->s->info.num_ubos; 108b8e80941Smrg 109b8e80941Smrg ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); 110b8e80941Smrg 111b8e80941Smrg /* Layout of constant registers, each section aligned to vec4. Note 112b8e80941Smrg * that pointer size (ubo, etc) changes depending on generation. 113b8e80941Smrg * 114b8e80941Smrg * user consts 115b8e80941Smrg * UBO addresses 116b8e80941Smrg * SSBO sizes 117b8e80941Smrg * if (vertex shader) { 118b8e80941Smrg * driver params (IR3_DP_*) 119b8e80941Smrg * if (stream_output.num_outputs > 0) 120b8e80941Smrg * stream-out addresses 121b8e80941Smrg * } 122b8e80941Smrg * immediates 123b8e80941Smrg * 124b8e80941Smrg * Immediates go last mostly because they are inserted in the CP pass 125b8e80941Smrg * after the nir -> ir3 frontend. 126b8e80941Smrg * 127b8e80941Smrg * Note UBO size in bytes should be aligned to vec4 128b8e80941Smrg */ 129b8e80941Smrg debug_assert((ctx->so->shader->ubo_state.size % 16) == 0); 130b8e80941Smrg unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4); 131b8e80941Smrg unsigned ptrsz = ir3_pointer_size(ctx); 132b8e80941Smrg 133b8e80941Smrg memset(&so->constbase, ~0, sizeof(so->constbase)); 134b8e80941Smrg 135b8e80941Smrg if (so->num_ubos > 0) { 136b8e80941Smrg so->constbase.ubo = constoff; 137b8e80941Smrg constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; 138b8e80941Smrg } 139b8e80941Smrg 140b8e80941Smrg if (so->const_layout.ssbo_size.count > 0) { 141b8e80941Smrg unsigned cnt = so->const_layout.ssbo_size.count; 142b8e80941Smrg so->constbase.ssbo_sizes = constoff; 143b8e80941Smrg constoff += align(cnt, 4) / 4; 144b8e80941Smrg } 145b8e80941Smrg 146b8e80941Smrg if (so->const_layout.image_dims.count > 0) { 147b8e80941Smrg unsigned cnt = so->const_layout.image_dims.count; 148b8e80941Smrg so->constbase.image_dims = constoff; 149b8e80941Smrg constoff += align(cnt, 4) / 4; 150b8e80941Smrg } 151b8e80941Smrg 152b8e80941Smrg unsigned num_driver_params = 0; 153b8e80941Smrg if (so->type == MESA_SHADER_VERTEX) { 154b8e80941Smrg num_driver_params = IR3_DP_VS_COUNT; 155b8e80941Smrg } else if (so->type == MESA_SHADER_COMPUTE) { 156b8e80941Smrg num_driver_params = IR3_DP_CS_COUNT; 157b8e80941Smrg } 158b8e80941Smrg 159b8e80941Smrg so->constbase.driver_param = constoff; 160b8e80941Smrg constoff += align(num_driver_params, 4) / 4; 161b8e80941Smrg 162b8e80941Smrg if ((so->type == MESA_SHADER_VERTEX) && 163b8e80941Smrg (compiler->gpu_id < 500) && 164b8e80941Smrg so->shader->stream_output.num_outputs > 0) { 165b8e80941Smrg so->constbase.tfbo = constoff; 166b8e80941Smrg constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; 167b8e80941Smrg } 168b8e80941Smrg 169b8e80941Smrg so->constbase.immediate = constoff; 170b8e80941Smrg 171b8e80941Smrg return ctx; 172b8e80941Smrg} 173b8e80941Smrg 174b8e80941Smrgvoid 175b8e80941Smrgir3_context_free(struct ir3_context *ctx) 176b8e80941Smrg{ 177b8e80941Smrg ralloc_free(ctx); 178b8e80941Smrg} 179b8e80941Smrg 180b8e80941Smrg/* 181b8e80941Smrg * Misc helpers 182b8e80941Smrg */ 183b8e80941Smrg 184b8e80941Smrg/* allocate a n element value array (to be populated by caller) and 185b8e80941Smrg * insert in def_ht 186b8e80941Smrg */ 187b8e80941Smrgstruct ir3_instruction ** 188b8e80941Smrgir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) 189b8e80941Smrg{ 190b8e80941Smrg struct ir3_instruction **value = 191b8e80941Smrg ralloc_array(ctx->def_ht, struct ir3_instruction *, n); 192b8e80941Smrg _mesa_hash_table_insert(ctx->def_ht, dst, value); 193b8e80941Smrg return value; 194b8e80941Smrg} 195b8e80941Smrg 196b8e80941Smrgstruct ir3_instruction ** 197b8e80941Smrgir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) 198b8e80941Smrg{ 199b8e80941Smrg struct ir3_instruction **value; 200b8e80941Smrg 201b8e80941Smrg if (dst->is_ssa) { 202b8e80941Smrg value = ir3_get_dst_ssa(ctx, &dst->ssa, n); 203b8e80941Smrg } else { 204b8e80941Smrg value = ralloc_array(ctx, struct ir3_instruction *, n); 205b8e80941Smrg } 206b8e80941Smrg 207b8e80941Smrg /* NOTE: in non-ssa case, we don't really need to store last_dst 208b8e80941Smrg * but this helps us catch cases where put_dst() call is forgotten 209b8e80941Smrg */ 210b8e80941Smrg compile_assert(ctx, !ctx->last_dst); 211b8e80941Smrg ctx->last_dst = value; 212b8e80941Smrg ctx->last_dst_n = n; 213b8e80941Smrg 214b8e80941Smrg return value; 215b8e80941Smrg} 216b8e80941Smrg 217b8e80941Smrgstruct ir3_instruction * const * 218b8e80941Smrgir3_get_src(struct ir3_context *ctx, nir_src *src) 219b8e80941Smrg{ 220b8e80941Smrg if (src->is_ssa) { 221b8e80941Smrg struct hash_entry *entry; 222b8e80941Smrg entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); 223b8e80941Smrg compile_assert(ctx, entry); 224b8e80941Smrg return entry->data; 225b8e80941Smrg } else { 226b8e80941Smrg nir_register *reg = src->reg.reg; 227b8e80941Smrg struct ir3_array *arr = ir3_get_array(ctx, reg); 228b8e80941Smrg unsigned num_components = arr->r->num_components; 229b8e80941Smrg struct ir3_instruction *addr = NULL; 230b8e80941Smrg struct ir3_instruction **value = 231b8e80941Smrg ralloc_array(ctx, struct ir3_instruction *, num_components); 232b8e80941Smrg 233b8e80941Smrg if (src->reg.indirect) 234b8e80941Smrg addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0], 235b8e80941Smrg reg->num_components); 236b8e80941Smrg 237b8e80941Smrg for (unsigned i = 0; i < num_components; i++) { 238b8e80941Smrg unsigned n = src->reg.base_offset * reg->num_components + i; 239b8e80941Smrg compile_assert(ctx, n < arr->length); 240b8e80941Smrg value[i] = ir3_create_array_load(ctx, arr, n, addr); 241b8e80941Smrg } 242b8e80941Smrg 243b8e80941Smrg return value; 244b8e80941Smrg } 245b8e80941Smrg} 246b8e80941Smrg 247b8e80941Smrgvoid 248b8e80941Smrgir3_put_dst(struct ir3_context *ctx, nir_dest *dst) 249b8e80941Smrg{ 250b8e80941Smrg unsigned bit_size = nir_dest_bit_size(*dst); 251b8e80941Smrg 252b8e80941Smrg /* add extra mov if dst value is HIGH reg.. in some cases not all 253b8e80941Smrg * instructions can read from HIGH regs, in cases where they can 254b8e80941Smrg * ir3_cp will clean up the extra mov: 255b8e80941Smrg */ 256b8e80941Smrg for (unsigned i = 0; i < ctx->last_dst_n; i++) { 257b8e80941Smrg if (!ctx->last_dst[i]) 258b8e80941Smrg continue; 259b8e80941Smrg if (ctx->last_dst[i]->regs[0]->flags & IR3_REG_HIGH) { 260b8e80941Smrg ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32); 261b8e80941Smrg } 262b8e80941Smrg } 263b8e80941Smrg 264b8e80941Smrg if (bit_size < 32) { 265b8e80941Smrg for (unsigned i = 0; i < ctx->last_dst_n; i++) { 266b8e80941Smrg struct ir3_instruction *dst = ctx->last_dst[i]; 267b8e80941Smrg dst->regs[0]->flags |= IR3_REG_HALF; 268b8e80941Smrg if (ctx->last_dst[i]->opc == OPC_META_FO) 269b8e80941Smrg dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF; 270b8e80941Smrg } 271b8e80941Smrg } 272b8e80941Smrg 273b8e80941Smrg if (!dst->is_ssa) { 274b8e80941Smrg nir_register *reg = dst->reg.reg; 275b8e80941Smrg struct ir3_array *arr = ir3_get_array(ctx, reg); 276b8e80941Smrg unsigned num_components = ctx->last_dst_n; 277b8e80941Smrg struct ir3_instruction *addr = NULL; 278b8e80941Smrg 279b8e80941Smrg if (dst->reg.indirect) 280b8e80941Smrg addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0], 281b8e80941Smrg reg->num_components); 282b8e80941Smrg 283b8e80941Smrg for (unsigned i = 0; i < num_components; i++) { 284b8e80941Smrg unsigned n = dst->reg.base_offset * reg->num_components + i; 285b8e80941Smrg compile_assert(ctx, n < arr->length); 286b8e80941Smrg if (!ctx->last_dst[i]) 287b8e80941Smrg continue; 288b8e80941Smrg ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr); 289b8e80941Smrg } 290b8e80941Smrg 291b8e80941Smrg ralloc_free(ctx->last_dst); 292b8e80941Smrg } 293b8e80941Smrg 294b8e80941Smrg ctx->last_dst = NULL; 295b8e80941Smrg ctx->last_dst_n = 0; 296b8e80941Smrg} 297b8e80941Smrg 298b8e80941Smrgstruct ir3_instruction * 299b8e80941Smrgir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr, 300b8e80941Smrg unsigned arrsz) 301b8e80941Smrg{ 302b8e80941Smrg struct ir3_block *block = ctx->block; 303b8e80941Smrg struct ir3_instruction *collect; 304b8e80941Smrg 305b8e80941Smrg if (arrsz == 0) 306b8e80941Smrg return NULL; 307b8e80941Smrg 308b8e80941Smrg unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF; 309b8e80941Smrg 310b8e80941Smrg collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); 311b8e80941Smrg ir3_reg_create(collect, 0, flags); /* dst */ 312b8e80941Smrg for (unsigned i = 0; i < arrsz; i++) { 313b8e80941Smrg struct ir3_instruction *elem = arr[i]; 314b8e80941Smrg 315b8e80941Smrg /* Since arrays are pre-colored in RA, we can't assume that 316b8e80941Smrg * things will end up in the right place. (Ie. if a collect 317b8e80941Smrg * joins elements from two different arrays.) So insert an 318b8e80941Smrg * extra mov. 319b8e80941Smrg * 320b8e80941Smrg * We could possibly skip this if all the collected elements 321b8e80941Smrg * are contiguous elements in a single array.. not sure how 322b8e80941Smrg * likely that is to happen. 323b8e80941Smrg * 324b8e80941Smrg * Fixes a problem with glamor shaders, that in effect do 325b8e80941Smrg * something like: 326b8e80941Smrg * 327b8e80941Smrg * if (foo) 328b8e80941Smrg * texcoord = .. 329b8e80941Smrg * else 330b8e80941Smrg * texcoord = .. 331b8e80941Smrg * color = texture2D(tex, texcoord); 332b8e80941Smrg * 333b8e80941Smrg * In this case, texcoord will end up as nir registers (which 334b8e80941Smrg * translate to ir3 array's of length 1. And we can't assume 335b8e80941Smrg * the two (or more) arrays will get allocated in consecutive 336b8e80941Smrg * scalar registers. 337b8e80941Smrg * 338b8e80941Smrg */ 339b8e80941Smrg if (elem->regs[0]->flags & IR3_REG_ARRAY) { 340b8e80941Smrg type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 341b8e80941Smrg elem = ir3_MOV(block, elem, type); 342b8e80941Smrg } 343b8e80941Smrg 344b8e80941Smrg compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags); 345b8e80941Smrg ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem; 346b8e80941Smrg } 347b8e80941Smrg 348b8e80941Smrg collect->regs[0]->wrmask = MASK(arrsz); 349b8e80941Smrg 350b8e80941Smrg return collect; 351b8e80941Smrg} 352b8e80941Smrg 353b8e80941Smrg/* helper for instructions that produce multiple consecutive scalar 354b8e80941Smrg * outputs which need to have a split/fanout meta instruction inserted 355b8e80941Smrg */ 356b8e80941Smrgvoid 357b8e80941Smrgir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, 358b8e80941Smrg struct ir3_instruction *src, unsigned base, unsigned n) 359b8e80941Smrg{ 360b8e80941Smrg struct ir3_instruction *prev = NULL; 361b8e80941Smrg 362b8e80941Smrg if ((n == 1) && (src->regs[0]->wrmask == 0x1)) { 363b8e80941Smrg dst[0] = src; 364b8e80941Smrg return; 365b8e80941Smrg } 366b8e80941Smrg 367b8e80941Smrg unsigned flags = src->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH); 368b8e80941Smrg 369b8e80941Smrg for (int i = 0, j = 0; i < n; i++) { 370b8e80941Smrg struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); 371b8e80941Smrg ir3_reg_create(split, 0, IR3_REG_SSA | flags); 372b8e80941Smrg ir3_reg_create(split, 0, IR3_REG_SSA | flags)->instr = src; 373b8e80941Smrg split->fo.off = i + base; 374b8e80941Smrg 375b8e80941Smrg if (prev) { 376b8e80941Smrg split->cp.left = prev; 377b8e80941Smrg split->cp.left_cnt++; 378b8e80941Smrg prev->cp.right = split; 379b8e80941Smrg prev->cp.right_cnt++; 380b8e80941Smrg } 381b8e80941Smrg prev = split; 382b8e80941Smrg 383b8e80941Smrg if (src->regs[0]->wrmask & (1 << (i + base))) 384b8e80941Smrg dst[j++] = split; 385b8e80941Smrg } 386b8e80941Smrg} 387b8e80941Smrg 388b8e80941SmrgNORETURN void 389b8e80941Smrgir3_context_error(struct ir3_context *ctx, const char *format, ...) 390b8e80941Smrg{ 391b8e80941Smrg struct hash_table *errors = NULL; 392b8e80941Smrg va_list ap; 393b8e80941Smrg va_start(ap, format); 394b8e80941Smrg if (ctx->cur_instr) { 395b8e80941Smrg errors = _mesa_hash_table_create(NULL, 396b8e80941Smrg _mesa_hash_pointer, 397b8e80941Smrg _mesa_key_pointer_equal); 398b8e80941Smrg char *msg = ralloc_vasprintf(errors, format, ap); 399b8e80941Smrg _mesa_hash_table_insert(errors, ctx->cur_instr, msg); 400b8e80941Smrg } else { 401b8e80941Smrg _debug_vprintf(format, ap); 402b8e80941Smrg } 403b8e80941Smrg va_end(ap); 404b8e80941Smrg nir_print_shader_annotated(ctx->s, stdout, errors); 405b8e80941Smrg ralloc_free(errors); 406b8e80941Smrg ctx->error = true; 407b8e80941Smrg unreachable(""); 408b8e80941Smrg} 409b8e80941Smrg 410b8e80941Smrgstatic struct ir3_instruction * 411b8e80941Smrgcreate_addr(struct ir3_block *block, struct ir3_instruction *src, int align) 412b8e80941Smrg{ 413b8e80941Smrg struct ir3_instruction *instr, *immed; 414b8e80941Smrg 415b8e80941Smrg /* TODO in at least some cases, the backend could probably be 416b8e80941Smrg * made clever enough to propagate IR3_REG_HALF.. 417b8e80941Smrg */ 418b8e80941Smrg instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); 419b8e80941Smrg instr->regs[0]->flags |= IR3_REG_HALF; 420b8e80941Smrg 421b8e80941Smrg switch(align){ 422b8e80941Smrg case 1: 423b8e80941Smrg /* src *= 1: */ 424b8e80941Smrg break; 425b8e80941Smrg case 2: 426b8e80941Smrg /* src *= 2 => src <<= 1: */ 427b8e80941Smrg immed = create_immed(block, 1); 428b8e80941Smrg immed->regs[0]->flags |= IR3_REG_HALF; 429b8e80941Smrg 430b8e80941Smrg instr = ir3_SHL_B(block, instr, 0, immed, 0); 431b8e80941Smrg instr->regs[0]->flags |= IR3_REG_HALF; 432b8e80941Smrg instr->regs[1]->flags |= IR3_REG_HALF; 433b8e80941Smrg break; 434b8e80941Smrg case 3: 435b8e80941Smrg /* src *= 3: */ 436b8e80941Smrg immed = create_immed(block, 3); 437b8e80941Smrg immed->regs[0]->flags |= IR3_REG_HALF; 438b8e80941Smrg 439b8e80941Smrg instr = ir3_MULL_U(block, instr, 0, immed, 0); 440b8e80941Smrg instr->regs[0]->flags |= IR3_REG_HALF; 441b8e80941Smrg instr->regs[1]->flags |= IR3_REG_HALF; 442b8e80941Smrg break; 443b8e80941Smrg case 4: 444b8e80941Smrg /* src *= 4 => src <<= 2: */ 445b8e80941Smrg immed = create_immed(block, 2); 446b8e80941Smrg immed->regs[0]->flags |= IR3_REG_HALF; 447b8e80941Smrg 448b8e80941Smrg instr = ir3_SHL_B(block, instr, 0, immed, 0); 449b8e80941Smrg instr->regs[0]->flags |= IR3_REG_HALF; 450b8e80941Smrg instr->regs[1]->flags |= IR3_REG_HALF; 451b8e80941Smrg break; 452b8e80941Smrg default: 453b8e80941Smrg unreachable("bad align"); 454b8e80941Smrg return NULL; 455b8e80941Smrg } 456b8e80941Smrg 457b8e80941Smrg instr = ir3_MOV(block, instr, TYPE_S16); 458b8e80941Smrg instr->regs[0]->num = regid(REG_A0, 0); 459b8e80941Smrg instr->regs[0]->flags |= IR3_REG_HALF; 460b8e80941Smrg instr->regs[1]->flags |= IR3_REG_HALF; 461b8e80941Smrg 462b8e80941Smrg return instr; 463b8e80941Smrg} 464b8e80941Smrg 465b8e80941Smrg/* caches addr values to avoid generating multiple cov/shl/mova 466b8e80941Smrg * sequences for each use of a given NIR level src as address 467b8e80941Smrg */ 468b8e80941Smrgstruct ir3_instruction * 469b8e80941Smrgir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align) 470b8e80941Smrg{ 471b8e80941Smrg struct ir3_instruction *addr; 472b8e80941Smrg unsigned idx = align - 1; 473b8e80941Smrg 474b8e80941Smrg compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht)); 475b8e80941Smrg 476b8e80941Smrg if (!ctx->addr_ht[idx]) { 477b8e80941Smrg ctx->addr_ht[idx] = _mesa_hash_table_create(ctx, 478b8e80941Smrg _mesa_hash_pointer, _mesa_key_pointer_equal); 479b8e80941Smrg } else { 480b8e80941Smrg struct hash_entry *entry; 481b8e80941Smrg entry = _mesa_hash_table_search(ctx->addr_ht[idx], src); 482b8e80941Smrg if (entry) 483b8e80941Smrg return entry->data; 484b8e80941Smrg } 485b8e80941Smrg 486b8e80941Smrg addr = create_addr(ctx->block, src, align); 487b8e80941Smrg _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr); 488b8e80941Smrg 489b8e80941Smrg return addr; 490b8e80941Smrg} 491b8e80941Smrg 492b8e80941Smrgstruct ir3_instruction * 493b8e80941Smrgir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) 494b8e80941Smrg{ 495b8e80941Smrg struct ir3_block *b = ctx->block; 496b8e80941Smrg struct ir3_instruction *cond; 497b8e80941Smrg 498b8e80941Smrg /* NOTE: only cmps.*.* can write p0.x: */ 499b8e80941Smrg cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0); 500b8e80941Smrg cond->cat2.condition = IR3_COND_NE; 501b8e80941Smrg 502b8e80941Smrg /* condition always goes in predicate register: */ 503b8e80941Smrg cond->regs[0]->num = regid(REG_P0, 0); 504b8e80941Smrg 505b8e80941Smrg return cond; 506b8e80941Smrg} 507b8e80941Smrg 508b8e80941Smrg/* 509b8e80941Smrg * Array helpers 510b8e80941Smrg */ 511b8e80941Smrg 512b8e80941Smrgvoid 513b8e80941Smrgir3_declare_array(struct ir3_context *ctx, nir_register *reg) 514b8e80941Smrg{ 515b8e80941Smrg struct ir3_array *arr = rzalloc(ctx, struct ir3_array); 516b8e80941Smrg arr->id = ++ctx->num_arrays; 517b8e80941Smrg /* NOTE: sometimes we get non array regs, for example for arrays of 518b8e80941Smrg * length 1. See fs-const-array-of-struct-of-array.shader_test. So 519b8e80941Smrg * treat a non-array as if it was an array of length 1. 520b8e80941Smrg * 521b8e80941Smrg * It would be nice if there was a nir pass to convert arrays of 522b8e80941Smrg * length 1 to ssa. 523b8e80941Smrg */ 524b8e80941Smrg arr->length = reg->num_components * MAX2(1, reg->num_array_elems); 525b8e80941Smrg compile_assert(ctx, arr->length > 0); 526b8e80941Smrg arr->r = reg; 527b8e80941Smrg list_addtail(&arr->node, &ctx->ir->array_list); 528b8e80941Smrg} 529b8e80941Smrg 530b8e80941Smrgstruct ir3_array * 531b8e80941Smrgir3_get_array(struct ir3_context *ctx, nir_register *reg) 532b8e80941Smrg{ 533b8e80941Smrg list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { 534b8e80941Smrg if (arr->r == reg) 535b8e80941Smrg return arr; 536b8e80941Smrg } 537b8e80941Smrg ir3_context_error(ctx, "bogus reg: %s\n", reg->name); 538b8e80941Smrg return NULL; 539b8e80941Smrg} 540b8e80941Smrg 541b8e80941Smrg/* relative (indirect) if address!=NULL */ 542b8e80941Smrgstruct ir3_instruction * 543b8e80941Smrgir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, 544b8e80941Smrg struct ir3_instruction *address) 545b8e80941Smrg{ 546b8e80941Smrg struct ir3_block *block = ctx->block; 547b8e80941Smrg struct ir3_instruction *mov; 548b8e80941Smrg struct ir3_register *src; 549b8e80941Smrg 550b8e80941Smrg mov = ir3_instr_create(block, OPC_MOV); 551b8e80941Smrg mov->cat1.src_type = TYPE_U32; 552b8e80941Smrg mov->cat1.dst_type = TYPE_U32; 553b8e80941Smrg mov->barrier_class = IR3_BARRIER_ARRAY_R; 554b8e80941Smrg mov->barrier_conflict = IR3_BARRIER_ARRAY_W; 555b8e80941Smrg ir3_reg_create(mov, 0, 0); 556b8e80941Smrg src = ir3_reg_create(mov, 0, IR3_REG_ARRAY | 557b8e80941Smrg COND(address, IR3_REG_RELATIV)); 558b8e80941Smrg src->instr = arr->last_write; 559b8e80941Smrg src->size = arr->length; 560b8e80941Smrg src->array.id = arr->id; 561b8e80941Smrg src->array.offset = n; 562b8e80941Smrg 563b8e80941Smrg if (address) 564b8e80941Smrg ir3_instr_set_address(mov, address); 565b8e80941Smrg 566b8e80941Smrg return mov; 567b8e80941Smrg} 568b8e80941Smrg 569b8e80941Smrg/* relative (indirect) if address!=NULL */ 570b8e80941Smrgvoid 571b8e80941Smrgir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, 572b8e80941Smrg struct ir3_instruction *src, struct ir3_instruction *address) 573b8e80941Smrg{ 574b8e80941Smrg struct ir3_block *block = ctx->block; 575b8e80941Smrg struct ir3_instruction *mov; 576b8e80941Smrg struct ir3_register *dst; 577b8e80941Smrg 578b8e80941Smrg /* if not relative store, don't create an extra mov, since that 579b8e80941Smrg * ends up being difficult for cp to remove. 580b8e80941Smrg */ 581b8e80941Smrg if (!address) { 582b8e80941Smrg dst = src->regs[0]; 583b8e80941Smrg 584b8e80941Smrg src->barrier_class |= IR3_BARRIER_ARRAY_W; 585b8e80941Smrg src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 586b8e80941Smrg 587b8e80941Smrg dst->flags |= IR3_REG_ARRAY; 588b8e80941Smrg dst->instr = arr->last_write; 589b8e80941Smrg dst->size = arr->length; 590b8e80941Smrg dst->array.id = arr->id; 591b8e80941Smrg dst->array.offset = n; 592b8e80941Smrg 593b8e80941Smrg arr->last_write = src; 594b8e80941Smrg 595b8e80941Smrg array_insert(block, block->keeps, src); 596b8e80941Smrg 597b8e80941Smrg return; 598b8e80941Smrg } 599b8e80941Smrg 600b8e80941Smrg mov = ir3_instr_create(block, OPC_MOV); 601b8e80941Smrg mov->cat1.src_type = TYPE_U32; 602b8e80941Smrg mov->cat1.dst_type = TYPE_U32; 603b8e80941Smrg mov->barrier_class = IR3_BARRIER_ARRAY_W; 604b8e80941Smrg mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 605b8e80941Smrg dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | 606b8e80941Smrg COND(address, IR3_REG_RELATIV)); 607b8e80941Smrg dst->instr = arr->last_write; 608b8e80941Smrg dst->size = arr->length; 609b8e80941Smrg dst->array.id = arr->id; 610b8e80941Smrg dst->array.offset = n; 611b8e80941Smrg ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src; 612b8e80941Smrg 613b8e80941Smrg if (address) 614b8e80941Smrg ir3_instr_set_address(mov, address); 615b8e80941Smrg 616b8e80941Smrg arr->last_write = mov; 617b8e80941Smrg 618b8e80941Smrg /* the array store may only matter to something in an earlier 619b8e80941Smrg * block (ie. loops), but since arrays are not in SSA, depth 620b8e80941Smrg * pass won't know this.. so keep all array stores: 621b8e80941Smrg */ 622b8e80941Smrg array_insert(block, block->keeps, mov); 623b8e80941Smrg} 624