17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2018-2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#ifndef __AGX_PUBLIC_H_ 257ec681f3Smrg#define __AGX_PUBLIC_H_ 267ec681f3Smrg 277ec681f3Smrg#include "compiler/nir/nir.h" 287ec681f3Smrg#include "util/u_dynarray.h" 297ec681f3Smrg#include "asahi/lib/agx_pack.h" 307ec681f3Smrg 317ec681f3Smrgenum agx_push_type { 327ec681f3Smrg /* Array of 64-bit pointers to the base addresses (BASES) and array of 337ec681f3Smrg * 16-bit sizes for optional bounds checking (SIZES) */ 347ec681f3Smrg AGX_PUSH_UBO_BASES = 0, 357ec681f3Smrg AGX_PUSH_UBO_SIZES = 1, 367ec681f3Smrg AGX_PUSH_VBO_BASES = 2, 377ec681f3Smrg AGX_PUSH_VBO_SIZES = 3, 387ec681f3Smrg AGX_PUSH_SSBO_BASES = 4, 397ec681f3Smrg AGX_PUSH_SSBO_SIZES = 5, 407ec681f3Smrg 417ec681f3Smrg /* Push the attached constant memory */ 427ec681f3Smrg AGX_PUSH_CONSTANTS = 6, 437ec681f3Smrg 447ec681f3Smrg /* Push the content of a UBO */ 457ec681f3Smrg AGX_PUSH_UBO_DATA = 7, 467ec681f3Smrg 477ec681f3Smrg /* RGBA blend constant (FP32) */ 487ec681f3Smrg AGX_PUSH_BLEND_CONST = 8, 497ec681f3Smrg 507ec681f3Smrg /* Keep last */ 517ec681f3Smrg AGX_PUSH_NUM_TYPES 527ec681f3Smrg}; 537ec681f3Smrg 547ec681f3Smrgstruct agx_push { 557ec681f3Smrg /* Contents to push */ 567ec681f3Smrg enum agx_push_type type : 8; 577ec681f3Smrg 587ec681f3Smrg /* Base of where to push, indexed in 16-bit units. The uniform file contains 597ec681f3Smrg * 512 = 2^9 such units. */ 607ec681f3Smrg unsigned base : 9; 617ec681f3Smrg 627ec681f3Smrg /* Number of 16-bit units to push */ 637ec681f3Smrg unsigned length : 9; 647ec681f3Smrg 657ec681f3Smrg /* If set, rather than pushing the specified data, push a pointer to the 667ec681f3Smrg * specified data. This is slower to access but enables indirect access, as 677ec681f3Smrg * the uniform file does not support indirection. */ 687ec681f3Smrg bool indirect : 1; 697ec681f3Smrg 707ec681f3Smrg union { 717ec681f3Smrg struct { 727ec681f3Smrg uint16_t ubo; 737ec681f3Smrg uint16_t offset; 747ec681f3Smrg } ubo_data; 757ec681f3Smrg }; 767ec681f3Smrg}; 777ec681f3Smrg 787ec681f3Smrg/* Arbitrary */ 797ec681f3Smrg#define AGX_MAX_PUSH_RANGES (16) 807ec681f3Smrg#define AGX_MAX_VARYINGS (32) 817ec681f3Smrg 827ec681f3Smrgstruct agx_varyings { 837ec681f3Smrg unsigned nr_descs, nr_slots; 847ec681f3Smrg struct agx_varying_packed packed[AGX_MAX_VARYINGS]; 857ec681f3Smrg}; 867ec681f3Smrg 877ec681f3Smrgstruct agx_shader_info { 887ec681f3Smrg unsigned push_ranges; 897ec681f3Smrg struct agx_push push[AGX_MAX_PUSH_RANGES]; 907ec681f3Smrg struct agx_varyings varyings; 917ec681f3Smrg 927ec681f3Smrg /* Does the shader read the tilebuffer? */ 937ec681f3Smrg bool reads_tib; 947ec681f3Smrg 957ec681f3Smrg /* Does the shader write point size? */ 967ec681f3Smrg bool writes_psiz; 977ec681f3Smrg}; 987ec681f3Smrg 997ec681f3Smrg#define AGX_MAX_RTS (8) 1007ec681f3Smrg#define AGX_MAX_ATTRIBS (16) 1017ec681f3Smrg#define AGX_MAX_VBUFS (16) 1027ec681f3Smrg 1037ec681f3Smrgenum agx_format { 1047ec681f3Smrg AGX_FORMAT_I8 = 0, 1057ec681f3Smrg AGX_FORMAT_I16 = 1, 1067ec681f3Smrg AGX_FORMAT_I32 = 2, 1077ec681f3Smrg AGX_FORMAT_F16 = 3, 1087ec681f3Smrg AGX_FORMAT_U8NORM = 4, 1097ec681f3Smrg AGX_FORMAT_S8NORM = 5, 1107ec681f3Smrg AGX_FORMAT_U16NORM = 6, 1117ec681f3Smrg AGX_FORMAT_S16NORM = 7, 1127ec681f3Smrg AGX_FORMAT_RGB10A2 = 8, 1137ec681f3Smrg AGX_FORMAT_SRGBA8 = 10, 1147ec681f3Smrg AGX_FORMAT_RG11B10F = 12, 1157ec681f3Smrg AGX_FORMAT_RGB9E5 = 13, 1167ec681f3Smrg 1177ec681f3Smrg /* Keep last */ 1187ec681f3Smrg AGX_NUM_FORMATS, 1197ec681f3Smrg}; 1207ec681f3Smrg 1217ec681f3Smrg/* Returns the number of bits at the bottom of the address required to be zero. 1227ec681f3Smrg * That is, returns the base-2 logarithm of the minimum alignment for an 1237ec681f3Smrg * agx_format, where the minimum alignment is 2^n where n is the result of this 1247ec681f3Smrg * function. The offset argument to device_load is left-shifted by this amount 1257ec681f3Smrg * in the hardware */ 1267ec681f3Smrg 1277ec681f3Smrgstatic inline unsigned 1287ec681f3Smrgagx_format_shift(enum agx_format format) 1297ec681f3Smrg{ 1307ec681f3Smrg switch (format) { 1317ec681f3Smrg case AGX_FORMAT_I8: 1327ec681f3Smrg case AGX_FORMAT_U8NORM: 1337ec681f3Smrg case AGX_FORMAT_S8NORM: 1347ec681f3Smrg case AGX_FORMAT_SRGBA8: 1357ec681f3Smrg return 0; 1367ec681f3Smrg 1377ec681f3Smrg case AGX_FORMAT_I16: 1387ec681f3Smrg case AGX_FORMAT_F16: 1397ec681f3Smrg case AGX_FORMAT_U16NORM: 1407ec681f3Smrg case AGX_FORMAT_S16NORM: 1417ec681f3Smrg return 1; 1427ec681f3Smrg 1437ec681f3Smrg case AGX_FORMAT_I32: 1447ec681f3Smrg case AGX_FORMAT_RGB10A2: 1457ec681f3Smrg case AGX_FORMAT_RG11B10F: 1467ec681f3Smrg case AGX_FORMAT_RGB9E5: 1477ec681f3Smrg return 2; 1487ec681f3Smrg 1497ec681f3Smrg default: 1507ec681f3Smrg unreachable("invalid format"); 1517ec681f3Smrg } 1527ec681f3Smrg} 1537ec681f3Smrg 1547ec681f3Smrgstruct agx_attribute { 1557ec681f3Smrg uint32_t divisor; 1567ec681f3Smrg 1577ec681f3Smrg unsigned buf : 5; 1587ec681f3Smrg unsigned src_offset : 16; 1597ec681f3Smrg unsigned nr_comps_minus_1 : 2; 1607ec681f3Smrg enum agx_format format : 4; 1617ec681f3Smrg unsigned padding : 5; 1627ec681f3Smrg}; 1637ec681f3Smrg 1647ec681f3Smrgstruct agx_vs_shader_key { 1657ec681f3Smrg unsigned num_vbufs; 1667ec681f3Smrg unsigned vbuf_strides[AGX_MAX_VBUFS]; 1677ec681f3Smrg 1687ec681f3Smrg struct agx_attribute attributes[AGX_MAX_ATTRIBS]; 1697ec681f3Smrg 1707ec681f3Smrg /* Set to true for clip coordinates to range [0, 1] instead of [-1, 1] */ 1717ec681f3Smrg bool clip_halfz : 1; 1727ec681f3Smrg}; 1737ec681f3Smrg 1747ec681f3Smrgstruct agx_fs_shader_key { 1757ec681f3Smrg enum agx_format tib_formats[AGX_MAX_RTS]; 1767ec681f3Smrg}; 1777ec681f3Smrg 1787ec681f3Smrgstruct agx_shader_key { 1797ec681f3Smrg union { 1807ec681f3Smrg struct agx_vs_shader_key vs; 1817ec681f3Smrg struct agx_fs_shader_key fs; 1827ec681f3Smrg }; 1837ec681f3Smrg}; 1847ec681f3Smrg 1857ec681f3Smrgvoid 1867ec681f3Smrgagx_compile_shader_nir(nir_shader *nir, 1877ec681f3Smrg struct agx_shader_key *key, 1887ec681f3Smrg struct util_dynarray *binary, 1897ec681f3Smrg struct agx_shader_info *out); 1907ec681f3Smrg 1917ec681f3Smrgstatic const nir_shader_compiler_options agx_nir_options = { 1927ec681f3Smrg .lower_scmp = true, 1937ec681f3Smrg .lower_flrp16 = true, 1947ec681f3Smrg .lower_flrp32 = true, 1957ec681f3Smrg .lower_ffract = true, 1967ec681f3Smrg .lower_fmod = true, 1977ec681f3Smrg .lower_fdiv = true, 1987ec681f3Smrg .lower_isign = true, 1997ec681f3Smrg .lower_iabs = true, 2007ec681f3Smrg .lower_fpow = true, 2017ec681f3Smrg .lower_find_lsb = true, 2027ec681f3Smrg .lower_ifind_msb = true, 2037ec681f3Smrg .lower_fdph = true, 2047ec681f3Smrg .lower_wpos_pntc = true, 2057ec681f3Smrg .lower_fsign = true, 2067ec681f3Smrg .lower_rotate = true, 2077ec681f3Smrg .lower_pack_split = true, 2087ec681f3Smrg .lower_insert_byte = true, 2097ec681f3Smrg .lower_insert_word = true, 2107ec681f3Smrg .lower_uniforms_to_ubo = true, 2117ec681f3Smrg .lower_cs_local_index_from_id = true, 2127ec681f3Smrg 2137ec681f3Smrg .lower_doubles_options = nir_lower_dmod, 2147ec681f3Smrg .lower_int64_options = ~(nir_lower_iadd64 | nir_lower_imul_2x32_64), 2157ec681f3Smrg 2167ec681f3Smrg .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), 2177ec681f3Smrg 2187ec681f3Smrg .has_fsub = true, 2197ec681f3Smrg .has_isub = true, 2207ec681f3Smrg .has_cs_global_id = true, 2217ec681f3Smrg 2227ec681f3Smrg .vectorize_io = true, 2237ec681f3Smrg .fuse_ffma16 = true, 2247ec681f3Smrg .fuse_ffma32 = true, 2257ec681f3Smrg .use_interpolated_input_intrinsics = true, 2267ec681f3Smrg}; 2277ec681f3Smrg 2287ec681f3Smrg#endif 229