17e102996Smaya/* 27e102996Smaya * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org> 37e102996Smaya * 47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a 57e102996Smaya * copy of this software and associated documentation files (the "Software"), 67e102996Smaya * to deal in the Software without restriction, including without limitation 77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the 97e102996Smaya * Software is furnished to do so, subject to the following conditions: 107e102996Smaya * 117e102996Smaya * The above copyright notice and this permission notice (including the next 127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the 137e102996Smaya * Software. 147e102996Smaya * 157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217e102996Smaya * SOFTWARE. 227e102996Smaya * 237e102996Smaya * Authors: 247e102996Smaya * Rob Clark <robclark@freedesktop.org> 257e102996Smaya */ 267e102996Smaya 277e102996Smaya#ifndef IR3_CONTEXT_H_ 287e102996Smaya#define IR3_CONTEXT_H_ 297e102996Smaya 307ec681f3Smrg#include "ir3.h" 317e102996Smaya#include "ir3_compiler.h" 327e102996Smaya#include "ir3_nir.h" 337e102996Smaya 347e102996Smaya/* for conditionally setting boolean flag(s): */ 357e102996Smaya#define COND(bool, val) ((bool) ? (val) : 0) 367e102996Smaya 377ec681f3Smrg#define DBG(fmt, ...) \ 387ec681f3Smrg do { \ 397ec681f3Smrg mesa_logd("%s:%d: " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 407ec681f3Smrg } while (0) 417e102996Smaya 427e102996Smaya/** 437e102996Smaya * The context for compilation of a single shader. 447e102996Smaya */ 457e102996Smayastruct ir3_context { 467ec681f3Smrg struct ir3_compiler *compiler; 477ec681f3Smrg const struct ir3_context_funcs *funcs; 487ec681f3Smrg 497ec681f3Smrg struct nir_shader *s; 507ec681f3Smrg 517ec681f3Smrg struct nir_instr *cur_instr; /* current instruction, just for debug */ 527ec681f3Smrg 537ec681f3Smrg struct ir3 *ir; 547ec681f3Smrg struct ir3_shader_variant *so; 557ec681f3Smrg 567ec681f3Smrg /* Tables of scalar inputs/outputs. Because of the way varying packing 577ec681f3Smrg * works, we could have inputs w/ fractional location, which is a bit 587ec681f3Smrg * awkward to deal with unless we keep track of the split scalar in/ 597ec681f3Smrg * out components. 607ec681f3Smrg * 617ec681f3Smrg * These *only* have inputs/outputs that are touched by load_*input and 627ec681f3Smrg * store_output. 637ec681f3Smrg */ 647ec681f3Smrg unsigned ninputs, noutputs; 657ec681f3Smrg struct ir3_instruction **inputs; 667ec681f3Smrg struct ir3_instruction **outputs; 677ec681f3Smrg 687ec681f3Smrg struct ir3_block *block; /* the current block */ 697ec681f3Smrg struct ir3_block *in_block; /* block created for shader inputs */ 707ec681f3Smrg 717ec681f3Smrg nir_function_impl *impl; 727ec681f3Smrg 737ec681f3Smrg /* For fragment shaders, varyings are not actual shader inputs, 747ec681f3Smrg * instead the hw passes a ij coord which is used with 757ec681f3Smrg * bary.f. 767ec681f3Smrg * 777ec681f3Smrg * But NIR doesn't know that, it still declares varyings as 787ec681f3Smrg * inputs. So we do all the input tracking normally and fix 797ec681f3Smrg * things up after compile_instructions() 807ec681f3Smrg */ 817ec681f3Smrg struct ir3_instruction *ij[IJ_COUNT]; 827ec681f3Smrg 837ec681f3Smrg /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */ 847ec681f3Smrg struct ir3_instruction *frag_face, *frag_coord; 857ec681f3Smrg 867ec681f3Smrg /* For vertex shaders, keep track of the system values sources */ 877ec681f3Smrg struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance, 887ec681f3Smrg *draw_id, *view_index; 897ec681f3Smrg 907ec681f3Smrg /* For fragment shaders: */ 917ec681f3Smrg struct ir3_instruction *samp_id, *samp_mask_in; 927ec681f3Smrg 937ec681f3Smrg /* For geometry shaders: */ 947ec681f3Smrg struct ir3_instruction *primitive_id; 957ec681f3Smrg struct ir3_instruction *gs_header; 967ec681f3Smrg 977ec681f3Smrg /* For tessellation shaders: */ 987ec681f3Smrg struct ir3_instruction *patch_vertices_in; 997ec681f3Smrg struct ir3_instruction *tcs_header; 1007ec681f3Smrg struct ir3_instruction *tess_coord; 1017ec681f3Smrg struct ir3_instruction *rel_patch_id; 1027ec681f3Smrg 1037ec681f3Smrg /* Compute shader inputs: */ 1047ec681f3Smrg struct ir3_instruction *local_invocation_id, *work_group_id; 1057ec681f3Smrg 1067ec681f3Smrg /* mapping from nir_register to defining instruction: */ 1077ec681f3Smrg struct hash_table *def_ht; 1087ec681f3Smrg 1097ec681f3Smrg unsigned num_arrays; 1107ec681f3Smrg 1117ec681f3Smrg /* Tracking for max level of flowcontrol (branchstack) needed 1127ec681f3Smrg * by a5xx+: 1137ec681f3Smrg */ 1147ec681f3Smrg unsigned stack, max_stack; 1157ec681f3Smrg 1167ec681f3Smrg unsigned loop_id; 1177ec681f3Smrg unsigned loop_depth; 1187ec681f3Smrg 1197ec681f3Smrg /* a common pattern for indirect addressing is to request the 1207ec681f3Smrg * same address register multiple times. To avoid generating 1217ec681f3Smrg * duplicate instruction sequences (which our backend does not 1227ec681f3Smrg * try to clean up, since that should be done as the NIR stage) 1237ec681f3Smrg * we cache the address value generated for a given src value: 1247ec681f3Smrg * 1257ec681f3Smrg * Note that we have to cache these per alignment, since same 1267ec681f3Smrg * src used for an array of vec1 cannot be also used for an 1277ec681f3Smrg * array of vec4. 1287ec681f3Smrg */ 1297ec681f3Smrg struct hash_table *addr0_ht[4]; 1307ec681f3Smrg 1317ec681f3Smrg /* The same for a1.x. We only support immediate values for a1.x, as this 1327ec681f3Smrg * is the only use so far. 1337ec681f3Smrg */ 1347ec681f3Smrg struct hash_table_u64 *addr1_ht; 1357ec681f3Smrg 1367ec681f3Smrg struct hash_table *sel_cond_conversions; 1377ec681f3Smrg 1387ec681f3Smrg /* last dst array, for indirect we need to insert a var-store. 1397ec681f3Smrg */ 1407ec681f3Smrg struct ir3_instruction **last_dst; 1417ec681f3Smrg unsigned last_dst_n; 1427ec681f3Smrg 1437ec681f3Smrg /* maps nir_block to ir3_block, mostly for the purposes of 1447ec681f3Smrg * figuring out the blocks successors 1457ec681f3Smrg */ 1467ec681f3Smrg struct hash_table *block_ht; 1477ec681f3Smrg 1487ec681f3Smrg /* maps nir_block at the top of a loop to ir3_block collecting continue 1497ec681f3Smrg * edges. 1507ec681f3Smrg */ 1517ec681f3Smrg struct hash_table *continue_block_ht; 1527ec681f3Smrg 1537ec681f3Smrg /* on a4xx, bitmask of samplers which need astc+srgb workaround: */ 1547ec681f3Smrg unsigned astc_srgb; 1557ec681f3Smrg 1567ec681f3Smrg unsigned samples; /* bitmask of x,y sample shifts */ 1577ec681f3Smrg 1587ec681f3Smrg unsigned max_texture_index; 1597ec681f3Smrg 1607ec681f3Smrg unsigned prefetch_limit; 1617ec681f3Smrg 1627ec681f3Smrg /* set if we encounter something we can't handle yet, so we 1637ec681f3Smrg * can bail cleanly and fallback to TGSI compiler f/e 1647ec681f3Smrg */ 1657ec681f3Smrg bool error; 1667e102996Smaya}; 1677e102996Smaya 1687e102996Smayastruct ir3_context_funcs { 1697ec681f3Smrg void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx, 1707ec681f3Smrg nir_intrinsic_instr *intr, 1717ec681f3Smrg struct ir3_instruction **dst); 1727ec681f3Smrg void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx, 1737ec681f3Smrg nir_intrinsic_instr *intr); 1747ec681f3Smrg struct ir3_instruction *(*emit_intrinsic_atomic_ssbo)( 1757ec681f3Smrg struct ir3_context *ctx, nir_intrinsic_instr *intr); 1767ec681f3Smrg void (*emit_intrinsic_load_image)(struct ir3_context *ctx, 1777ec681f3Smrg nir_intrinsic_instr *intr, 1787ec681f3Smrg struct ir3_instruction **dst); 1797ec681f3Smrg void (*emit_intrinsic_store_image)(struct ir3_context *ctx, 1807ec681f3Smrg nir_intrinsic_instr *intr); 1817ec681f3Smrg struct ir3_instruction *(*emit_intrinsic_atomic_image)( 1827ec681f3Smrg struct ir3_context *ctx, nir_intrinsic_instr *intr); 1837ec681f3Smrg void (*emit_intrinsic_image_size)(struct ir3_context *ctx, 1847ec681f3Smrg nir_intrinsic_instr *intr, 1857ec681f3Smrg struct ir3_instruction **dst); 1867ec681f3Smrg void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx, 1877ec681f3Smrg nir_intrinsic_instr *intr, 1887ec681f3Smrg struct ir3_instruction **dst); 1897ec681f3Smrg void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx, 1907ec681f3Smrg nir_intrinsic_instr *intr); 1917e102996Smaya}; 1927e102996Smaya 1937e102996Smayaextern const struct ir3_context_funcs ir3_a4xx_funcs; 1947e102996Smayaextern const struct ir3_context_funcs ir3_a6xx_funcs; 1957e102996Smaya 1967ec681f3Smrgstruct ir3_context *ir3_context_init(struct ir3_compiler *compiler, 1977ec681f3Smrg struct ir3_shader_variant *so); 1987e102996Smayavoid ir3_context_free(struct ir3_context *ctx); 1997e102996Smaya 2007ec681f3Smrgstruct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx, 2017ec681f3Smrg nir_ssa_def *dst, unsigned n); 2027ec681f3Smrgstruct ir3_instruction **ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, 2037ec681f3Smrg unsigned n); 2047ec681f3Smrgstruct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx, 2057ec681f3Smrg nir_src *src); 2067e102996Smayavoid ir3_put_dst(struct ir3_context *ctx, nir_dest *dst); 2077ec681f3Smrgstruct ir3_instruction *ir3_create_collect(struct ir3_block *block, 2087ec681f3Smrg struct ir3_instruction *const *arr, 2097ec681f3Smrg unsigned arrsz); 2107e102996Smayavoid ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, 2117ec681f3Smrg struct ir3_instruction *src, unsigned base, unsigned n); 2127ec681f3Smrgvoid ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc); 2137ec681f3Smrgvoid ir3_handle_nonuniform(struct ir3_instruction *instr, 2147ec681f3Smrg nir_intrinsic_instr *intrin); 2157ec681f3Smrgvoid emit_intrinsic_image_size_tex(struct ir3_context *ctx, 2167ec681f3Smrg nir_intrinsic_instr *intr, 2177ec681f3Smrg struct ir3_instruction **dst); 2187ec681f3Smrg 2197ec681f3Smrg#define ir3_collect(block, ...) \ 2207ec681f3Smrg ({ \ 2217ec681f3Smrg struct ir3_instruction *__arr[] = {__VA_ARGS__}; \ 2227ec681f3Smrg ir3_create_collect(block, __arr, ARRAY_SIZE(__arr)); \ 2237ec681f3Smrg }) 2247ec681f3Smrg 2257ec681f3SmrgNORETURN void ir3_context_error(struct ir3_context *ctx, const char *format, 2267ec681f3Smrg ...); 2277ec681f3Smrg 2287ec681f3Smrg#define compile_assert(ctx, cond) \ 2297ec681f3Smrg do { \ 2307ec681f3Smrg if (!(cond)) \ 2317ec681f3Smrg ir3_context_error((ctx), "failed assert: " #cond "\n"); \ 2327ec681f3Smrg } while (0) 2337ec681f3Smrg 2347ec681f3Smrgstruct ir3_instruction *ir3_get_addr0(struct ir3_context *ctx, 2357ec681f3Smrg struct ir3_instruction *src, int align); 2367ec681f3Smrgstruct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx, 2377ec681f3Smrg unsigned const_val); 2387ec681f3Smrgstruct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx, 2397ec681f3Smrg struct ir3_instruction *src); 2407e102996Smaya 2417e102996Smayavoid ir3_declare_array(struct ir3_context *ctx, nir_register *reg); 2427ec681f3Smrgstruct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_register *reg); 2437e102996Smayastruct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx, 2447ec681f3Smrg struct ir3_array *arr, int n, 2457ec681f3Smrg struct ir3_instruction *address); 2467ec681f3Smrgvoid ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, 2477ec681f3Smrg int n, struct ir3_instruction *src, 2487ec681f3Smrg struct ir3_instruction *address); 2497ec681f3Smrg 2507ec681f3Smrgstatic inline type_t 2517ec681f3Smrgutype_for_size(unsigned bit_size) 2527e102996Smaya{ 2537ec681f3Smrg switch (bit_size) { 2547ec681f3Smrg case 32: 2557ec681f3Smrg return TYPE_U32; 2567ec681f3Smrg case 16: 2577ec681f3Smrg return TYPE_U16; 2587ec681f3Smrg case 8: 2597ec681f3Smrg return TYPE_U8; 2607ec681f3Smrg default: 2617ec681f3Smrg unreachable("bad bitsize"); 2627ec681f3Smrg return ~0; 2637ec681f3Smrg } 2647e102996Smaya} 2657e102996Smaya 2667ec681f3Smrgstatic inline type_t 2677ec681f3Smrgutype_src(nir_src src) 2687ec681f3Smrg{ 2697ec681f3Smrg return utype_for_size(nir_src_bit_size(src)); 2707ec681f3Smrg} 2717e102996Smaya 2727ec681f3Smrgstatic inline type_t 2737ec681f3Smrgutype_dst(nir_dest dst) 2747ec681f3Smrg{ 2757ec681f3Smrg return utype_for_size(nir_dest_bit_size(dst)); 2767ec681f3Smrg} 2777e102996Smaya 2787e102996Smaya#endif /* IR3_CONTEXT_H_ */ 279