17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2018-2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#ifndef __AGX_PUBLIC_H_
257ec681f3Smrg#define __AGX_PUBLIC_H_
267ec681f3Smrg
277ec681f3Smrg#include "compiler/nir/nir.h"
287ec681f3Smrg#include "util/u_dynarray.h"
297ec681f3Smrg#include "asahi/lib/agx_pack.h"
307ec681f3Smrg
317ec681f3Smrgenum agx_push_type {
327ec681f3Smrg   /* Array of 64-bit pointers to the base addresses (BASES) and array of
337ec681f3Smrg    * 16-bit sizes for optional bounds checking (SIZES) */
347ec681f3Smrg   AGX_PUSH_UBO_BASES = 0,
357ec681f3Smrg   AGX_PUSH_UBO_SIZES = 1,
367ec681f3Smrg   AGX_PUSH_VBO_BASES = 2,
377ec681f3Smrg   AGX_PUSH_VBO_SIZES = 3,
387ec681f3Smrg   AGX_PUSH_SSBO_BASES = 4,
397ec681f3Smrg   AGX_PUSH_SSBO_SIZES = 5,
407ec681f3Smrg
417ec681f3Smrg   /* Push the attached constant memory */
427ec681f3Smrg   AGX_PUSH_CONSTANTS = 6,
437ec681f3Smrg
447ec681f3Smrg   /* Push the content of a UBO */
457ec681f3Smrg   AGX_PUSH_UBO_DATA = 7,
467ec681f3Smrg
477ec681f3Smrg   /* RGBA blend constant (FP32) */
487ec681f3Smrg   AGX_PUSH_BLEND_CONST = 8,
497ec681f3Smrg
507ec681f3Smrg   /* Keep last */
517ec681f3Smrg   AGX_PUSH_NUM_TYPES
527ec681f3Smrg};
537ec681f3Smrg
547ec681f3Smrgstruct agx_push {
557ec681f3Smrg   /* Contents to push */
567ec681f3Smrg   enum agx_push_type type : 8;
577ec681f3Smrg
587ec681f3Smrg   /* Base of where to push, indexed in 16-bit units. The uniform file contains
597ec681f3Smrg    * 512 = 2^9 such units. */
607ec681f3Smrg   unsigned base : 9;
617ec681f3Smrg
627ec681f3Smrg   /* Number of 16-bit units to push */
637ec681f3Smrg   unsigned length : 9;
647ec681f3Smrg
657ec681f3Smrg   /* If set, rather than pushing the specified data, push a pointer to the
667ec681f3Smrg    * specified data. This is slower to access but enables indirect access, as
677ec681f3Smrg    * the uniform file does not support indirection. */
687ec681f3Smrg   bool indirect : 1;
697ec681f3Smrg
707ec681f3Smrg   union {
717ec681f3Smrg      struct {
727ec681f3Smrg         uint16_t ubo;
737ec681f3Smrg         uint16_t offset;
747ec681f3Smrg      } ubo_data;
757ec681f3Smrg   };
767ec681f3Smrg};
777ec681f3Smrg
787ec681f3Smrg/* Arbitrary */
797ec681f3Smrg#define AGX_MAX_PUSH_RANGES (16)
807ec681f3Smrg#define AGX_MAX_VARYINGS (32)
817ec681f3Smrg
827ec681f3Smrgstruct agx_varyings {
837ec681f3Smrg   unsigned nr_descs, nr_slots;
847ec681f3Smrg   struct agx_varying_packed packed[AGX_MAX_VARYINGS];
857ec681f3Smrg};
867ec681f3Smrg
877ec681f3Smrgstruct agx_shader_info {
887ec681f3Smrg   unsigned push_ranges;
897ec681f3Smrg   struct agx_push push[AGX_MAX_PUSH_RANGES];
907ec681f3Smrg   struct agx_varyings varyings;
917ec681f3Smrg
927ec681f3Smrg   /* Does the shader read the tilebuffer? */
937ec681f3Smrg   bool reads_tib;
947ec681f3Smrg
957ec681f3Smrg   /* Does the shader write point size? */
967ec681f3Smrg   bool writes_psiz;
977ec681f3Smrg};
987ec681f3Smrg
997ec681f3Smrg#define AGX_MAX_RTS (8)
1007ec681f3Smrg#define AGX_MAX_ATTRIBS (16)
1017ec681f3Smrg#define AGX_MAX_VBUFS (16)
1027ec681f3Smrg
1037ec681f3Smrgenum agx_format {
1047ec681f3Smrg   AGX_FORMAT_I8 = 0,
1057ec681f3Smrg   AGX_FORMAT_I16 = 1,
1067ec681f3Smrg   AGX_FORMAT_I32 = 2,
1077ec681f3Smrg   AGX_FORMAT_F16 = 3,
1087ec681f3Smrg   AGX_FORMAT_U8NORM = 4,
1097ec681f3Smrg   AGX_FORMAT_S8NORM = 5,
1107ec681f3Smrg   AGX_FORMAT_U16NORM = 6,
1117ec681f3Smrg   AGX_FORMAT_S16NORM = 7,
1127ec681f3Smrg   AGX_FORMAT_RGB10A2 = 8,
1137ec681f3Smrg   AGX_FORMAT_SRGBA8 = 10,
1147ec681f3Smrg   AGX_FORMAT_RG11B10F = 12,
1157ec681f3Smrg   AGX_FORMAT_RGB9E5 = 13,
1167ec681f3Smrg
1177ec681f3Smrg   /* Keep last */
1187ec681f3Smrg   AGX_NUM_FORMATS,
1197ec681f3Smrg};
1207ec681f3Smrg
1217ec681f3Smrg/* Returns the number of bits at the bottom of the address required to be zero.
1227ec681f3Smrg * That is, returns the base-2 logarithm of the minimum alignment for an
1237ec681f3Smrg * agx_format, where the minimum alignment is 2^n where n is the result of this
1247ec681f3Smrg * function. The offset argument to device_load is left-shifted by this amount
1257ec681f3Smrg * in the hardware */
1267ec681f3Smrg
1277ec681f3Smrgstatic inline unsigned
1287ec681f3Smrgagx_format_shift(enum agx_format format)
1297ec681f3Smrg{
1307ec681f3Smrg   switch (format) {
1317ec681f3Smrg   case AGX_FORMAT_I8:
1327ec681f3Smrg   case AGX_FORMAT_U8NORM:
1337ec681f3Smrg   case AGX_FORMAT_S8NORM:
1347ec681f3Smrg   case AGX_FORMAT_SRGBA8:
1357ec681f3Smrg      return 0;
1367ec681f3Smrg
1377ec681f3Smrg   case AGX_FORMAT_I16:
1387ec681f3Smrg   case AGX_FORMAT_F16:
1397ec681f3Smrg   case AGX_FORMAT_U16NORM:
1407ec681f3Smrg   case AGX_FORMAT_S16NORM:
1417ec681f3Smrg      return 1;
1427ec681f3Smrg
1437ec681f3Smrg   case AGX_FORMAT_I32:
1447ec681f3Smrg   case AGX_FORMAT_RGB10A2:
1457ec681f3Smrg   case AGX_FORMAT_RG11B10F:
1467ec681f3Smrg   case AGX_FORMAT_RGB9E5:
1477ec681f3Smrg      return 2;
1487ec681f3Smrg
1497ec681f3Smrg   default:
1507ec681f3Smrg      unreachable("invalid format");
1517ec681f3Smrg   }
1527ec681f3Smrg}
1537ec681f3Smrg
1547ec681f3Smrgstruct agx_attribute {
1557ec681f3Smrg   uint32_t divisor;
1567ec681f3Smrg
1577ec681f3Smrg   unsigned buf : 5;
1587ec681f3Smrg   unsigned src_offset : 16;
1597ec681f3Smrg   unsigned nr_comps_minus_1 : 2;
1607ec681f3Smrg   enum agx_format format : 4;
1617ec681f3Smrg   unsigned padding : 5;
1627ec681f3Smrg};
1637ec681f3Smrg
1647ec681f3Smrgstruct agx_vs_shader_key {
1657ec681f3Smrg   unsigned num_vbufs;
1667ec681f3Smrg   unsigned vbuf_strides[AGX_MAX_VBUFS];
1677ec681f3Smrg
1687ec681f3Smrg   struct agx_attribute attributes[AGX_MAX_ATTRIBS];
1697ec681f3Smrg
1707ec681f3Smrg   /* Set to true for clip coordinates to range [0, 1] instead of [-1, 1] */
1717ec681f3Smrg   bool clip_halfz : 1;
1727ec681f3Smrg};
1737ec681f3Smrg
1747ec681f3Smrgstruct agx_fs_shader_key {
1757ec681f3Smrg   enum agx_format tib_formats[AGX_MAX_RTS];
1767ec681f3Smrg};
1777ec681f3Smrg
1787ec681f3Smrgstruct agx_shader_key {
1797ec681f3Smrg   union {
1807ec681f3Smrg      struct agx_vs_shader_key vs;
1817ec681f3Smrg      struct agx_fs_shader_key fs;
1827ec681f3Smrg   };
1837ec681f3Smrg};
1847ec681f3Smrg
1857ec681f3Smrgvoid
1867ec681f3Smrgagx_compile_shader_nir(nir_shader *nir,
1877ec681f3Smrg      struct agx_shader_key *key,
1887ec681f3Smrg      struct util_dynarray *binary,
1897ec681f3Smrg      struct agx_shader_info *out);
1907ec681f3Smrg
1917ec681f3Smrgstatic const nir_shader_compiler_options agx_nir_options = {
1927ec681f3Smrg   .lower_scmp = true,
1937ec681f3Smrg   .lower_flrp16 = true,
1947ec681f3Smrg   .lower_flrp32 = true,
1957ec681f3Smrg   .lower_ffract = true,
1967ec681f3Smrg   .lower_fmod = true,
1977ec681f3Smrg   .lower_fdiv = true,
1987ec681f3Smrg   .lower_isign = true,
1997ec681f3Smrg   .lower_iabs = true,
2007ec681f3Smrg   .lower_fpow = true,
2017ec681f3Smrg   .lower_find_lsb = true,
2027ec681f3Smrg   .lower_ifind_msb = true,
2037ec681f3Smrg   .lower_fdph = true,
2047ec681f3Smrg   .lower_wpos_pntc = true,
2057ec681f3Smrg   .lower_fsign = true,
2067ec681f3Smrg   .lower_rotate = true,
2077ec681f3Smrg   .lower_pack_split = true,
2087ec681f3Smrg   .lower_insert_byte = true,
2097ec681f3Smrg   .lower_insert_word = true,
2107ec681f3Smrg   .lower_uniforms_to_ubo = true,
2117ec681f3Smrg   .lower_cs_local_index_from_id = true,
2127ec681f3Smrg
2137ec681f3Smrg   .lower_doubles_options = nir_lower_dmod,
2147ec681f3Smrg   .lower_int64_options = ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
2157ec681f3Smrg
2167ec681f3Smrg   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
2177ec681f3Smrg
2187ec681f3Smrg   .has_fsub = true,
2197ec681f3Smrg   .has_isub = true,
2207ec681f3Smrg   .has_cs_global_id = true,
2217ec681f3Smrg
2227ec681f3Smrg   .vectorize_io = true,
2237ec681f3Smrg   .fuse_ffma16 = true,
2247ec681f3Smrg   .fuse_ffma32 = true,
2257ec681f3Smrg   .use_interpolated_input_intrinsics = true,
2267ec681f3Smrg};
2277ec681f3Smrg
2287ec681f3Smrg#endif
229