17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2021 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#ifndef __PAN_SHADER_H__ 267ec681f3Smrg#define __PAN_SHADER_H__ 277ec681f3Smrg 287ec681f3Smrg#include "compiler/nir/nir.h" 297ec681f3Smrg#include "panfrost/util/pan_ir.h" 307ec681f3Smrg 317ec681f3Smrg#include "pan_device.h" 327ec681f3Smrg#include "genxml/gen_macros.h" 337ec681f3Smrg 347ec681f3Smrgstruct panfrost_device; 357ec681f3Smrg 367ec681f3Smrg#ifdef PAN_ARCH 377ec681f3Smrgconst nir_shader_compiler_options * 387ec681f3SmrgGENX(pan_shader_get_compiler_options)(void); 397ec681f3Smrg 407ec681f3Smrgvoid 417ec681f3SmrgGENX(pan_shader_compile)(nir_shader *nir, 427ec681f3Smrg struct panfrost_compile_inputs *inputs, 437ec681f3Smrg struct util_dynarray *binary, 447ec681f3Smrg struct pan_shader_info *info); 457ec681f3Smrg 467ec681f3Smrg#if PAN_ARCH <= 5 477ec681f3Smrgstatic inline void 487ec681f3Smrgpan_shader_prepare_midgard_rsd(const struct pan_shader_info *info, 497ec681f3Smrg struct MALI_RENDERER_STATE *rsd) 507ec681f3Smrg{ 517ec681f3Smrg assert((info->push.count & 3) == 0); 527ec681f3Smrg 537ec681f3Smrg rsd->properties.uniform_count = info->push.count / 4; 547ec681f3Smrg rsd->properties.shader_has_side_effects = info->writes_global; 557ec681f3Smrg rsd->properties.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED; 567ec681f3Smrg 577ec681f3Smrg /* For fragment shaders, work register count, early-z, reads at draw-time */ 587ec681f3Smrg 597ec681f3Smrg if (info->stage != MESA_SHADER_FRAGMENT) { 607ec681f3Smrg rsd->properties.work_register_count = info->work_reg_count; 617ec681f3Smrg } else { 627ec681f3Smrg rsd->properties.shader_reads_tilebuffer = 637ec681f3Smrg info->fs.outputs_read; 647ec681f3Smrg 657ec681f3Smrg /* However, forcing early-z in the shader overrides draw-time */ 667ec681f3Smrg rsd->properties.force_early_z = 677ec681f3Smrg info->fs.early_fragment_tests; 687ec681f3Smrg } 697ec681f3Smrg} 707ec681f3Smrg 717ec681f3Smrg#else 727ec681f3Smrg 737ec681f3Smrg/* Classify a shader into the following pixel kill categories: 747ec681f3Smrg * 757ec681f3Smrg * (force early, strong early): no side effects/depth/stencil/coverage writes (force) 767ec681f3Smrg * (weak early, weak early): no side effects/depth/stencil/coverage writes 777ec681f3Smrg * (weak early, force late): no side effects/depth/stencil writes 787ec681f3Smrg * (force late, weak early): side effects but no depth/stencil/coverage writes 797ec681f3Smrg * (force late, force early): only run for side effects 807ec681f3Smrg * (force late, force late): depth/stencil writes 817ec681f3Smrg * 827ec681f3Smrg * Note that discard is considered a coverage write. TODO: what about 837ec681f3Smrg * alpha-to-coverage? 847ec681f3Smrg * */ 857ec681f3Smrg 867ec681f3Smrg#define SET_PIXEL_KILL(kill, update) do { \ 877ec681f3Smrg rsd->properties.pixel_kill_operation = MALI_PIXEL_KILL_## kill; \ 887ec681f3Smrg rsd->properties.zs_update_operation = MALI_PIXEL_KILL_## update; \ 897ec681f3Smrg} while(0) 907ec681f3Smrg 917ec681f3Smrgstatic inline void 927ec681f3Smrgpan_shader_classify_pixel_kill_coverage(const struct pan_shader_info *info, 937ec681f3Smrg struct MALI_RENDERER_STATE *rsd) 947ec681f3Smrg{ 957ec681f3Smrg bool force_early = info->fs.early_fragment_tests; 967ec681f3Smrg bool sidefx = info->writes_global; 977ec681f3Smrg bool coverage = info->fs.writes_coverage || info->fs.can_discard; 987ec681f3Smrg bool depth = info->fs.writes_depth; 997ec681f3Smrg bool stencil = info->fs.writes_stencil; 1007ec681f3Smrg 1017ec681f3Smrg rsd->properties.shader_modifies_coverage = coverage; 1027ec681f3Smrg 1037ec681f3Smrg if (force_early) 1047ec681f3Smrg SET_PIXEL_KILL(FORCE_EARLY, STRONG_EARLY); 1057ec681f3Smrg else if (depth || stencil || (sidefx && coverage)) 1067ec681f3Smrg SET_PIXEL_KILL(FORCE_LATE, FORCE_LATE); 1077ec681f3Smrg else if (sidefx) 1087ec681f3Smrg SET_PIXEL_KILL(FORCE_LATE, WEAK_EARLY); 1097ec681f3Smrg else if (coverage) 1107ec681f3Smrg SET_PIXEL_KILL(WEAK_EARLY, FORCE_LATE); 1117ec681f3Smrg else 1127ec681f3Smrg SET_PIXEL_KILL(WEAK_EARLY, WEAK_EARLY); 1137ec681f3Smrg} 1147ec681f3Smrg 1157ec681f3Smrg#undef SET_PIXEL_KILL 1167ec681f3Smrg 1177ec681f3Smrgstatic inline void 1187ec681f3Smrgpan_shader_prepare_bifrost_rsd(const struct pan_shader_info *info, 1197ec681f3Smrg struct MALI_RENDERER_STATE *rsd) 1207ec681f3Smrg{ 1217ec681f3Smrg unsigned fau_count = DIV_ROUND_UP(info->push.count, 2); 1227ec681f3Smrg rsd->preload.uniform_count = fau_count; 1237ec681f3Smrg 1247ec681f3Smrg#if PAN_ARCH >= 7 1257ec681f3Smrg rsd->properties.shader_register_allocation = 1267ec681f3Smrg (info->work_reg_count <= 32) ? 1277ec681f3Smrg MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD : 1287ec681f3Smrg MALI_SHADER_REGISTER_ALLOCATION_64_PER_THREAD; 1297ec681f3Smrg#endif 1307ec681f3Smrg 1317ec681f3Smrg switch (info->stage) { 1327ec681f3Smrg case MESA_SHADER_VERTEX: 1337ec681f3Smrg rsd->preload.vertex.vertex_id = true; 1347ec681f3Smrg rsd->preload.vertex.instance_id = true; 1357ec681f3Smrg break; 1367ec681f3Smrg 1377ec681f3Smrg case MESA_SHADER_FRAGMENT: 1387ec681f3Smrg pan_shader_classify_pixel_kill_coverage(info, rsd); 1397ec681f3Smrg 1407ec681f3Smrg#if PAN_ARCH >= 7 1417ec681f3Smrg rsd->properties.shader_wait_dependency_6 = info->bifrost.wait_6; 1427ec681f3Smrg rsd->properties.shader_wait_dependency_7 = info->bifrost.wait_7; 1437ec681f3Smrg#endif 1447ec681f3Smrg 1457ec681f3Smrg /* Match the mesa/st convention. If this needs to be flipped, 1467ec681f3Smrg * nir_lower_pntc_ytransform will do so. */ 1477ec681f3Smrg rsd->properties.point_sprite_coord_origin_max_y = true; 1487ec681f3Smrg 1497ec681f3Smrg rsd->properties.allow_forward_pixel_to_be_killed = 1507ec681f3Smrg !info->fs.sidefx; 1517ec681f3Smrg 1527ec681f3Smrg rsd->preload.fragment.fragment_position = info->fs.reads_frag_coord; 1537ec681f3Smrg rsd->preload.fragment.coverage = true; 1547ec681f3Smrg rsd->preload.fragment.primitive_flags = info->fs.reads_face; 1557ec681f3Smrg 1567ec681f3Smrg /* Contains sample ID and sample mask. Sample position and 1577ec681f3Smrg * helper invocation are expressed in terms of the above, so 1587ec681f3Smrg * preload for those too */ 1597ec681f3Smrg rsd->preload.fragment.sample_mask_id = 1607ec681f3Smrg info->fs.reads_sample_id | 1617ec681f3Smrg info->fs.reads_sample_pos | 1627ec681f3Smrg info->fs.reads_sample_mask_in | 1637ec681f3Smrg info->fs.reads_helper_invocation | 1647ec681f3Smrg info->fs.sample_shading; 1657ec681f3Smrg 1667ec681f3Smrg#if PAN_ARCH >= 7 1677ec681f3Smrg rsd->message_preload_1 = info->bifrost.messages[0]; 1687ec681f3Smrg rsd->message_preload_2 = info->bifrost.messages[1]; 1697ec681f3Smrg#endif 1707ec681f3Smrg break; 1717ec681f3Smrg 1727ec681f3Smrg case MESA_SHADER_COMPUTE: 1737ec681f3Smrg rsd->preload.compute.local_invocation_xy = true; 1747ec681f3Smrg rsd->preload.compute.local_invocation_z = true; 1757ec681f3Smrg rsd->preload.compute.work_group_x = true; 1767ec681f3Smrg rsd->preload.compute.work_group_y = true; 1777ec681f3Smrg rsd->preload.compute.work_group_z = true; 1787ec681f3Smrg rsd->preload.compute.global_invocation_x = true; 1797ec681f3Smrg rsd->preload.compute.global_invocation_y = true; 1807ec681f3Smrg rsd->preload.compute.global_invocation_z = true; 1817ec681f3Smrg break; 1827ec681f3Smrg 1837ec681f3Smrg default: 1847ec681f3Smrg unreachable("TODO"); 1857ec681f3Smrg } 1867ec681f3Smrg} 1877ec681f3Smrg 1887ec681f3Smrg#endif 1897ec681f3Smrg 1907ec681f3Smrgstatic inline void 1917ec681f3Smrgpan_shader_prepare_rsd(const struct pan_shader_info *shader_info, 1927ec681f3Smrg mali_ptr shader_ptr, 1937ec681f3Smrg struct MALI_RENDERER_STATE *rsd) 1947ec681f3Smrg{ 1957ec681f3Smrg#if PAN_ARCH <= 5 1967ec681f3Smrg shader_ptr |= shader_info->midgard.first_tag; 1977ec681f3Smrg#endif 1987ec681f3Smrg 1997ec681f3Smrg rsd->shader.shader = shader_ptr; 2007ec681f3Smrg rsd->shader.attribute_count = shader_info->attribute_count; 2017ec681f3Smrg rsd->shader.varying_count = shader_info->varyings.input_count + 2027ec681f3Smrg shader_info->varyings.output_count; 2037ec681f3Smrg rsd->shader.texture_count = shader_info->texture_count; 2047ec681f3Smrg rsd->shader.sampler_count = shader_info->sampler_count; 2057ec681f3Smrg rsd->properties.shader_contains_barrier = shader_info->contains_barrier; 2067ec681f3Smrg rsd->properties.uniform_buffer_count = shader_info->ubo_count; 2077ec681f3Smrg 2087ec681f3Smrg if (shader_info->stage == MESA_SHADER_FRAGMENT) { 2097ec681f3Smrg rsd->properties.shader_contains_barrier |= 2107ec681f3Smrg shader_info->fs.helper_invocations; 2117ec681f3Smrg rsd->properties.stencil_from_shader = 2127ec681f3Smrg shader_info->fs.writes_stencil; 2137ec681f3Smrg rsd->properties.depth_source = 2147ec681f3Smrg shader_info->fs.writes_depth ? 2157ec681f3Smrg MALI_DEPTH_SOURCE_SHADER : 2167ec681f3Smrg MALI_DEPTH_SOURCE_FIXED_FUNCTION; 2177ec681f3Smrg 2187ec681f3Smrg /* This also needs to be set if the API forces per-sample 2197ec681f3Smrg * shading, but that'll just got ORed in */ 2207ec681f3Smrg rsd->multisample_misc.evaluate_per_sample = 2217ec681f3Smrg shader_info->fs.sample_shading; 2227ec681f3Smrg } 2237ec681f3Smrg 2247ec681f3Smrg#if PAN_ARCH >= 6 2257ec681f3Smrg pan_shader_prepare_bifrost_rsd(shader_info, rsd); 2267ec681f3Smrg#else 2277ec681f3Smrg pan_shader_prepare_midgard_rsd(shader_info, rsd); 2287ec681f3Smrg#endif 2297ec681f3Smrg} 2307ec681f3Smrg#endif /* PAN_ARCH */ 2317ec681f3Smrg 2327ec681f3Smrg#endif 233