17e102996Smaya/*
27e102996Smaya * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org>
37e102996Smaya *
47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a
57e102996Smaya * copy of this software and associated documentation files (the "Software"),
67e102996Smaya * to deal in the Software without restriction, including without limitation
77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the
97e102996Smaya * Software is furnished to do so, subject to the following conditions:
107e102996Smaya *
117e102996Smaya * The above copyright notice and this permission notice (including the next
127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the
137e102996Smaya * Software.
147e102996Smaya *
157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217e102996Smaya * SOFTWARE.
227e102996Smaya *
237e102996Smaya * Authors:
247e102996Smaya *    Rob Clark <robclark@freedesktop.org>
257e102996Smaya */
267e102996Smaya
277e102996Smaya#ifndef IR3_CONTEXT_H_
287e102996Smaya#define IR3_CONTEXT_H_
297e102996Smaya
307ec681f3Smrg#include "ir3.h"
317e102996Smaya#include "ir3_compiler.h"
327e102996Smaya#include "ir3_nir.h"
337e102996Smaya
347e102996Smaya/* for conditionally setting boolean flag(s): */
357e102996Smaya#define COND(bool, val) ((bool) ? (val) : 0)
367e102996Smaya
377ec681f3Smrg#define DBG(fmt, ...)                                                          \
387ec681f3Smrg   do {                                                                        \
397ec681f3Smrg      mesa_logd("%s:%d: " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__);         \
407ec681f3Smrg   } while (0)
417e102996Smaya
427e102996Smaya/**
437e102996Smaya * The context for compilation of a single shader.
447e102996Smaya */
457e102996Smayastruct ir3_context {
467ec681f3Smrg   struct ir3_compiler *compiler;
477ec681f3Smrg   const struct ir3_context_funcs *funcs;
487ec681f3Smrg
497ec681f3Smrg   struct nir_shader *s;
507ec681f3Smrg
517ec681f3Smrg   struct nir_instr *cur_instr; /* current instruction, just for debug */
527ec681f3Smrg
537ec681f3Smrg   struct ir3 *ir;
547ec681f3Smrg   struct ir3_shader_variant *so;
557ec681f3Smrg
567ec681f3Smrg   /* Tables of scalar inputs/outputs.  Because of the way varying packing
577ec681f3Smrg    * works, we could have inputs w/ fractional location, which is a bit
587ec681f3Smrg    * awkward to deal with unless we keep track of the split scalar in/
597ec681f3Smrg    * out components.
607ec681f3Smrg    *
617ec681f3Smrg    * These *only* have inputs/outputs that are touched by load_*input and
627ec681f3Smrg    * store_output.
637ec681f3Smrg    */
647ec681f3Smrg   unsigned ninputs, noutputs;
657ec681f3Smrg   struct ir3_instruction **inputs;
667ec681f3Smrg   struct ir3_instruction **outputs;
677ec681f3Smrg
687ec681f3Smrg   struct ir3_block *block;    /* the current block */
697ec681f3Smrg   struct ir3_block *in_block; /* block created for shader inputs */
707ec681f3Smrg
717ec681f3Smrg   nir_function_impl *impl;
727ec681f3Smrg
737ec681f3Smrg   /* For fragment shaders, varyings are not actual shader inputs,
747ec681f3Smrg    * instead the hw passes a ij coord which is used with
757ec681f3Smrg    * bary.f.
767ec681f3Smrg    *
777ec681f3Smrg    * But NIR doesn't know that, it still declares varyings as
787ec681f3Smrg    * inputs.  So we do all the input tracking normally and fix
797ec681f3Smrg    * things up after compile_instructions()
807ec681f3Smrg    */
817ec681f3Smrg   struct ir3_instruction *ij[IJ_COUNT];
827ec681f3Smrg
837ec681f3Smrg   /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
847ec681f3Smrg   struct ir3_instruction *frag_face, *frag_coord;
857ec681f3Smrg
867ec681f3Smrg   /* For vertex shaders, keep track of the system values sources */
877ec681f3Smrg   struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance,
887ec681f3Smrg      *draw_id, *view_index;
897ec681f3Smrg
907ec681f3Smrg   /* For fragment shaders: */
917ec681f3Smrg   struct ir3_instruction *samp_id, *samp_mask_in;
927ec681f3Smrg
937ec681f3Smrg   /* For geometry shaders: */
947ec681f3Smrg   struct ir3_instruction *primitive_id;
957ec681f3Smrg   struct ir3_instruction *gs_header;
967ec681f3Smrg
977ec681f3Smrg   /* For tessellation shaders: */
987ec681f3Smrg   struct ir3_instruction *patch_vertices_in;
997ec681f3Smrg   struct ir3_instruction *tcs_header;
1007ec681f3Smrg   struct ir3_instruction *tess_coord;
1017ec681f3Smrg   struct ir3_instruction *rel_patch_id;
1027ec681f3Smrg
1037ec681f3Smrg   /* Compute shader inputs: */
1047ec681f3Smrg   struct ir3_instruction *local_invocation_id, *work_group_id;
1057ec681f3Smrg
1067ec681f3Smrg   /* mapping from nir_register to defining instruction: */
1077ec681f3Smrg   struct hash_table *def_ht;
1087ec681f3Smrg
1097ec681f3Smrg   unsigned num_arrays;
1107ec681f3Smrg
1117ec681f3Smrg   /* Tracking for max level of flowcontrol (branchstack) needed
1127ec681f3Smrg    * by a5xx+:
1137ec681f3Smrg    */
1147ec681f3Smrg   unsigned stack, max_stack;
1157ec681f3Smrg
1167ec681f3Smrg   unsigned loop_id;
1177ec681f3Smrg   unsigned loop_depth;
1187ec681f3Smrg
1197ec681f3Smrg   /* a common pattern for indirect addressing is to request the
1207ec681f3Smrg    * same address register multiple times.  To avoid generating
1217ec681f3Smrg    * duplicate instruction sequences (which our backend does not
1227ec681f3Smrg    * try to clean up, since that should be done as the NIR stage)
1237ec681f3Smrg    * we cache the address value generated for a given src value:
1247ec681f3Smrg    *
1257ec681f3Smrg    * Note that we have to cache these per alignment, since same
1267ec681f3Smrg    * src used for an array of vec1 cannot be also used for an
1277ec681f3Smrg    * array of vec4.
1287ec681f3Smrg    */
1297ec681f3Smrg   struct hash_table *addr0_ht[4];
1307ec681f3Smrg
1317ec681f3Smrg   /* The same for a1.x. We only support immediate values for a1.x, as this
1327ec681f3Smrg    * is the only use so far.
1337ec681f3Smrg    */
1347ec681f3Smrg   struct hash_table_u64 *addr1_ht;
1357ec681f3Smrg
1367ec681f3Smrg   struct hash_table *sel_cond_conversions;
1377ec681f3Smrg
1387ec681f3Smrg   /* last dst array, for indirect we need to insert a var-store.
1397ec681f3Smrg    */
1407ec681f3Smrg   struct ir3_instruction **last_dst;
1417ec681f3Smrg   unsigned last_dst_n;
1427ec681f3Smrg
1437ec681f3Smrg   /* maps nir_block to ir3_block, mostly for the purposes of
1447ec681f3Smrg    * figuring out the blocks successors
1457ec681f3Smrg    */
1467ec681f3Smrg   struct hash_table *block_ht;
1477ec681f3Smrg
1487ec681f3Smrg   /* maps nir_block at the top of a loop to ir3_block collecting continue
1497ec681f3Smrg    * edges.
1507ec681f3Smrg    */
1517ec681f3Smrg   struct hash_table *continue_block_ht;
1527ec681f3Smrg
1537ec681f3Smrg   /* on a4xx, bitmask of samplers which need astc+srgb workaround: */
1547ec681f3Smrg   unsigned astc_srgb;
1557ec681f3Smrg
1567ec681f3Smrg   unsigned samples; /* bitmask of x,y sample shifts */
1577ec681f3Smrg
1587ec681f3Smrg   unsigned max_texture_index;
1597ec681f3Smrg
1607ec681f3Smrg   unsigned prefetch_limit;
1617ec681f3Smrg
1627ec681f3Smrg   /* set if we encounter something we can't handle yet, so we
1637ec681f3Smrg    * can bail cleanly and fallback to TGSI compiler f/e
1647ec681f3Smrg    */
1657ec681f3Smrg   bool error;
1667e102996Smaya};
1677e102996Smaya
1687e102996Smayastruct ir3_context_funcs {
1697ec681f3Smrg   void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx,
1707ec681f3Smrg                                    nir_intrinsic_instr *intr,
1717ec681f3Smrg                                    struct ir3_instruction **dst);
1727ec681f3Smrg   void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx,
1737ec681f3Smrg                                     nir_intrinsic_instr *intr);
1747ec681f3Smrg   struct ir3_instruction *(*emit_intrinsic_atomic_ssbo)(
1757ec681f3Smrg      struct ir3_context *ctx, nir_intrinsic_instr *intr);
1767ec681f3Smrg   void (*emit_intrinsic_load_image)(struct ir3_context *ctx,
1777ec681f3Smrg                                     nir_intrinsic_instr *intr,
1787ec681f3Smrg                                     struct ir3_instruction **dst);
1797ec681f3Smrg   void (*emit_intrinsic_store_image)(struct ir3_context *ctx,
1807ec681f3Smrg                                      nir_intrinsic_instr *intr);
1817ec681f3Smrg   struct ir3_instruction *(*emit_intrinsic_atomic_image)(
1827ec681f3Smrg      struct ir3_context *ctx, nir_intrinsic_instr *intr);
1837ec681f3Smrg   void (*emit_intrinsic_image_size)(struct ir3_context *ctx,
1847ec681f3Smrg                                     nir_intrinsic_instr *intr,
1857ec681f3Smrg                                     struct ir3_instruction **dst);
1867ec681f3Smrg   void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx,
1877ec681f3Smrg                                          nir_intrinsic_instr *intr,
1887ec681f3Smrg                                          struct ir3_instruction **dst);
1897ec681f3Smrg   void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx,
1907ec681f3Smrg                                           nir_intrinsic_instr *intr);
1917e102996Smaya};
1927e102996Smaya
1937e102996Smayaextern const struct ir3_context_funcs ir3_a4xx_funcs;
1947e102996Smayaextern const struct ir3_context_funcs ir3_a6xx_funcs;
1957e102996Smaya
1967ec681f3Smrgstruct ir3_context *ir3_context_init(struct ir3_compiler *compiler,
1977ec681f3Smrg                                     struct ir3_shader_variant *so);
1987e102996Smayavoid ir3_context_free(struct ir3_context *ctx);
1997e102996Smaya
2007ec681f3Smrgstruct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,
2017ec681f3Smrg                                         nir_ssa_def *dst, unsigned n);
2027ec681f3Smrgstruct ir3_instruction **ir3_get_dst(struct ir3_context *ctx, nir_dest *dst,
2037ec681f3Smrg                                     unsigned n);
2047ec681f3Smrgstruct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx,
2057ec681f3Smrg                                           nir_src *src);
2067e102996Smayavoid ir3_put_dst(struct ir3_context *ctx, nir_dest *dst);
2077ec681f3Smrgstruct ir3_instruction *ir3_create_collect(struct ir3_block *block,
2087ec681f3Smrg                                           struct ir3_instruction *const *arr,
2097ec681f3Smrg                                           unsigned arrsz);
2107e102996Smayavoid ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
2117ec681f3Smrg                    struct ir3_instruction *src, unsigned base, unsigned n);
2127ec681f3Smrgvoid ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc);
2137ec681f3Smrgvoid ir3_handle_nonuniform(struct ir3_instruction *instr,
2147ec681f3Smrg                           nir_intrinsic_instr *intrin);
2157ec681f3Smrgvoid emit_intrinsic_image_size_tex(struct ir3_context *ctx,
2167ec681f3Smrg                                   nir_intrinsic_instr *intr,
2177ec681f3Smrg                                   struct ir3_instruction **dst);
2187ec681f3Smrg
2197ec681f3Smrg#define ir3_collect(block, ...)                                                \
2207ec681f3Smrg   ({                                                                          \
2217ec681f3Smrg      struct ir3_instruction *__arr[] = {__VA_ARGS__};                         \
2227ec681f3Smrg      ir3_create_collect(block, __arr, ARRAY_SIZE(__arr));                     \
2237ec681f3Smrg   })
2247ec681f3Smrg
2257ec681f3SmrgNORETURN void ir3_context_error(struct ir3_context *ctx, const char *format,
2267ec681f3Smrg                                ...);
2277ec681f3Smrg
2287ec681f3Smrg#define compile_assert(ctx, cond)                                              \
2297ec681f3Smrg   do {                                                                        \
2307ec681f3Smrg      if (!(cond))                                                             \
2317ec681f3Smrg         ir3_context_error((ctx), "failed assert: " #cond "\n");               \
2327ec681f3Smrg   } while (0)
2337ec681f3Smrg
2347ec681f3Smrgstruct ir3_instruction *ir3_get_addr0(struct ir3_context *ctx,
2357ec681f3Smrg                                      struct ir3_instruction *src, int align);
2367ec681f3Smrgstruct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx,
2377ec681f3Smrg                                      unsigned const_val);
2387ec681f3Smrgstruct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx,
2397ec681f3Smrg                                          struct ir3_instruction *src);
2407e102996Smaya
2417e102996Smayavoid ir3_declare_array(struct ir3_context *ctx, nir_register *reg);
2427ec681f3Smrgstruct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_register *reg);
2437e102996Smayastruct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
2447ec681f3Smrg                                              struct ir3_array *arr, int n,
2457ec681f3Smrg                                              struct ir3_instruction *address);
2467ec681f3Smrgvoid ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr,
2477ec681f3Smrg                            int n, struct ir3_instruction *src,
2487ec681f3Smrg                            struct ir3_instruction *address);
2497ec681f3Smrg
2507ec681f3Smrgstatic inline type_t
2517ec681f3Smrgutype_for_size(unsigned bit_size)
2527e102996Smaya{
2537ec681f3Smrg   switch (bit_size) {
2547ec681f3Smrg   case 32:
2557ec681f3Smrg      return TYPE_U32;
2567ec681f3Smrg   case 16:
2577ec681f3Smrg      return TYPE_U16;
2587ec681f3Smrg   case 8:
2597ec681f3Smrg      return TYPE_U8;
2607ec681f3Smrg   default:
2617ec681f3Smrg      unreachable("bad bitsize");
2627ec681f3Smrg      return ~0;
2637ec681f3Smrg   }
2647e102996Smaya}
2657e102996Smaya
2667ec681f3Smrgstatic inline type_t
2677ec681f3Smrgutype_src(nir_src src)
2687ec681f3Smrg{
2697ec681f3Smrg   return utype_for_size(nir_src_bit_size(src));
2707ec681f3Smrg}
2717e102996Smaya
2727ec681f3Smrgstatic inline type_t
2737ec681f3Smrgutype_dst(nir_dest dst)
2747ec681f3Smrg{
2757ec681f3Smrg   return utype_for_size(nir_dest_bit_size(dst));
2767ec681f3Smrg}
2777e102996Smaya
2787e102996Smaya#endif /* IR3_CONTEXT_H_ */
279