17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2018-2020 Collabora, Ltd. 37ec681f3Smrg * Copyright (C) 2019-2020 Icecream95 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 227ec681f3Smrg * SOFTWARE. 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#include "pan_ir.h" 267ec681f3Smrg#include "compiler/nir/nir_builder.h" 277ec681f3Smrg 287ec681f3Smrg/* Midgard can write all of color, depth and stencil in a single writeout 297ec681f3Smrg * operation, so we merge depth/stencil stores with color stores. 307ec681f3Smrg * If there are no color stores, we add a write to the "depth RT". 317ec681f3Smrg * 327ec681f3Smrg * For Bifrost, we want these combined so we can properly order 337ec681f3Smrg * +ZS_EMIT with respect to +ATEST and +BLEND, as well as combining 347ec681f3Smrg * depth/stencil stores into a single +ZS_EMIT op. 357ec681f3Smrg */ 367ec681f3Smrgbool 377ec681f3Smrgpan_nir_lower_zs_store(nir_shader *nir) 387ec681f3Smrg{ 397ec681f3Smrg if (nir->info.stage != MESA_SHADER_FRAGMENT) 407ec681f3Smrg return false; 417ec681f3Smrg 427ec681f3Smrg nir_variable *z_var = NULL, *s_var = NULL; 437ec681f3Smrg 447ec681f3Smrg nir_foreach_shader_out_variable(var, nir) { 457ec681f3Smrg if (var->data.location == FRAG_RESULT_DEPTH) 467ec681f3Smrg z_var = var; 477ec681f3Smrg else if (var->data.location == FRAG_RESULT_STENCIL) 487ec681f3Smrg s_var = var; 497ec681f3Smrg } 507ec681f3Smrg 517ec681f3Smrg if (!z_var && !s_var) 527ec681f3Smrg return false; 537ec681f3Smrg 547ec681f3Smrg bool progress = false; 557ec681f3Smrg 567ec681f3Smrg nir_foreach_function(function, nir) { 577ec681f3Smrg if (!function->impl) continue; 587ec681f3Smrg 597ec681f3Smrg nir_intrinsic_instr *z_store = NULL, *s_store = NULL; 607ec681f3Smrg 617ec681f3Smrg nir_foreach_block(block, function->impl) { 627ec681f3Smrg nir_foreach_instr_safe(instr, block) { 637ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 647ec681f3Smrg continue; 657ec681f3Smrg 667ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 677ec681f3Smrg if (intr->intrinsic != nir_intrinsic_store_output) 687ec681f3Smrg continue; 697ec681f3Smrg 707ec681f3Smrg if (z_var && nir_intrinsic_base(intr) == z_var->data.driver_location) { 717ec681f3Smrg assert(!z_store); 727ec681f3Smrg z_store = intr; 737ec681f3Smrg } 747ec681f3Smrg 757ec681f3Smrg if (s_var && nir_intrinsic_base(intr) == s_var->data.driver_location) { 767ec681f3Smrg assert(!s_store); 777ec681f3Smrg s_store = intr; 787ec681f3Smrg } 797ec681f3Smrg } 807ec681f3Smrg } 817ec681f3Smrg 827ec681f3Smrg if (!z_store && !s_store) continue; 837ec681f3Smrg 847ec681f3Smrg bool replaced = false; 857ec681f3Smrg 867ec681f3Smrg nir_foreach_block(block, function->impl) { 877ec681f3Smrg nir_foreach_instr_safe(instr, block) { 887ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 897ec681f3Smrg continue; 907ec681f3Smrg 917ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 927ec681f3Smrg if (intr->intrinsic != nir_intrinsic_store_output) 937ec681f3Smrg continue; 947ec681f3Smrg 957ec681f3Smrg const nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_out, nir_intrinsic_base(intr)); 967ec681f3Smrg assert(var); 977ec681f3Smrg 987ec681f3Smrg if (var->data.location < FRAG_RESULT_DATA0) 997ec681f3Smrg continue; 1007ec681f3Smrg 1017ec681f3Smrg if (var->data.index) 1027ec681f3Smrg continue; 1037ec681f3Smrg 1047ec681f3Smrg assert(nir_src_is_const(intr->src[1]) && "no indirect outputs"); 1057ec681f3Smrg 1067ec681f3Smrg nir_builder b; 1077ec681f3Smrg nir_builder_init(&b, function->impl); 1087ec681f3Smrg 1097ec681f3Smrg assert(!z_store || z_store->instr.block == instr->block); 1107ec681f3Smrg assert(!s_store || s_store->instr.block == instr->block); 1117ec681f3Smrg b.cursor = nir_after_block_before_jump(instr->block); 1127ec681f3Smrg 1137ec681f3Smrg nir_intrinsic_instr *combined_store; 1147ec681f3Smrg combined_store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_combined_output_pan); 1157ec681f3Smrg 1167ec681f3Smrg combined_store->num_components = intr->src[0].ssa->num_components; 1177ec681f3Smrg 1187ec681f3Smrg nir_intrinsic_set_base(combined_store, nir_intrinsic_base(intr)); 1197ec681f3Smrg nir_intrinsic_set_src_type(combined_store, nir_intrinsic_src_type(intr)); 1207ec681f3Smrg 1217ec681f3Smrg unsigned writeout = PAN_WRITEOUT_C; 1227ec681f3Smrg if (z_store) 1237ec681f3Smrg writeout |= PAN_WRITEOUT_Z; 1247ec681f3Smrg if (s_store) 1257ec681f3Smrg writeout |= PAN_WRITEOUT_S; 1267ec681f3Smrg 1277ec681f3Smrg nir_intrinsic_set_component(combined_store, writeout); 1287ec681f3Smrg 1297ec681f3Smrg struct nir_ssa_def *zero = nir_imm_int(&b, 0); 1307ec681f3Smrg 1317ec681f3Smrg struct nir_ssa_def *src[4] = { 1327ec681f3Smrg intr->src[0].ssa, 1337ec681f3Smrg intr->src[1].ssa, 1347ec681f3Smrg z_store ? z_store->src[0].ssa : zero, 1357ec681f3Smrg s_store ? s_store->src[0].ssa : zero, 1367ec681f3Smrg }; 1377ec681f3Smrg 1387ec681f3Smrg for (int i = 0; i < 4; ++i) 1397ec681f3Smrg combined_store->src[i] = nir_src_for_ssa(src[i]); 1407ec681f3Smrg 1417ec681f3Smrg nir_builder_instr_insert(&b, &combined_store->instr); 1427ec681f3Smrg 1437ec681f3Smrg nir_instr_remove(instr); 1447ec681f3Smrg 1457ec681f3Smrg replaced = true; 1467ec681f3Smrg } 1477ec681f3Smrg } 1487ec681f3Smrg 1497ec681f3Smrg /* Insert a store to the depth RT (0xff) if needed */ 1507ec681f3Smrg if (!replaced) { 1517ec681f3Smrg nir_builder b; 1527ec681f3Smrg nir_builder_init(&b, function->impl); 1537ec681f3Smrg 1547ec681f3Smrg nir_block *block = NULL; 1557ec681f3Smrg if (z_store && s_store) 1567ec681f3Smrg assert(z_store->instr.block == s_store->instr.block); 1577ec681f3Smrg 1587ec681f3Smrg if (z_store) 1597ec681f3Smrg block = z_store->instr.block; 1607ec681f3Smrg else 1617ec681f3Smrg block = s_store->instr.block; 1627ec681f3Smrg 1637ec681f3Smrg b.cursor = nir_after_block_before_jump(block); 1647ec681f3Smrg 1657ec681f3Smrg nir_intrinsic_instr *combined_store; 1667ec681f3Smrg combined_store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_combined_output_pan); 1677ec681f3Smrg 1687ec681f3Smrg combined_store->num_components = 4; 1697ec681f3Smrg 1707ec681f3Smrg unsigned base; 1717ec681f3Smrg if (z_store) 1727ec681f3Smrg base = nir_intrinsic_base(z_store); 1737ec681f3Smrg else 1747ec681f3Smrg base = nir_intrinsic_base(s_store); 1757ec681f3Smrg nir_intrinsic_set_base(combined_store, base); 1767ec681f3Smrg nir_intrinsic_set_src_type(combined_store, nir_type_float32); 1777ec681f3Smrg 1787ec681f3Smrg unsigned writeout = 0; 1797ec681f3Smrg if (z_store) 1807ec681f3Smrg writeout |= PAN_WRITEOUT_Z; 1817ec681f3Smrg if (s_store) 1827ec681f3Smrg writeout |= PAN_WRITEOUT_S; 1837ec681f3Smrg 1847ec681f3Smrg nir_intrinsic_set_component(combined_store, writeout); 1857ec681f3Smrg 1867ec681f3Smrg struct nir_ssa_def *zero = nir_imm_int(&b, 0); 1877ec681f3Smrg 1887ec681f3Smrg struct nir_ssa_def *src[4] = { 1897ec681f3Smrg nir_imm_vec4(&b, 0, 0, 0, 0), 1907ec681f3Smrg zero, 1917ec681f3Smrg z_store ? z_store->src[0].ssa : zero, 1927ec681f3Smrg s_store ? s_store->src[0].ssa : zero, 1937ec681f3Smrg }; 1947ec681f3Smrg 1957ec681f3Smrg for (int i = 0; i < 4; ++i) 1967ec681f3Smrg combined_store->src[i] = nir_src_for_ssa(src[i]); 1977ec681f3Smrg 1987ec681f3Smrg nir_builder_instr_insert(&b, &combined_store->instr); 1997ec681f3Smrg } 2007ec681f3Smrg 2017ec681f3Smrg if (z_store) 2027ec681f3Smrg nir_instr_remove(&z_store->instr); 2037ec681f3Smrg 2047ec681f3Smrg if (s_store) 2057ec681f3Smrg nir_instr_remove(&s_store->instr); 2067ec681f3Smrg 2077ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); 2087ec681f3Smrg progress = true; 2097ec681f3Smrg } 2107ec681f3Smrg 2117ec681f3Smrg return progress; 2127ec681f3Smrg} 2137ec681f3Smrg 2147ec681f3Smrg/* Real writeout stores, which break execution, need to be moved to after 2157ec681f3Smrg * dual-source stores, which are just standard register writes. */ 2167ec681f3Smrgbool 2177ec681f3Smrgpan_nir_reorder_writeout(nir_shader *nir) 2187ec681f3Smrg{ 2197ec681f3Smrg bool progress = false; 2207ec681f3Smrg 2217ec681f3Smrg nir_foreach_function(function, nir) { 2227ec681f3Smrg if (!function->impl) continue; 2237ec681f3Smrg 2247ec681f3Smrg nir_foreach_block(block, function->impl) { 2257ec681f3Smrg nir_instr *last_writeout = NULL; 2267ec681f3Smrg 2277ec681f3Smrg nir_foreach_instr_reverse_safe(instr, block) { 2287ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 2297ec681f3Smrg continue; 2307ec681f3Smrg 2317ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 2327ec681f3Smrg if (intr->intrinsic != nir_intrinsic_store_output) 2337ec681f3Smrg continue; 2347ec681f3Smrg 2357ec681f3Smrg const nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_out, nir_intrinsic_base(intr)); 2367ec681f3Smrg 2377ec681f3Smrg if (var->data.index) { 2387ec681f3Smrg if (!last_writeout) 2397ec681f3Smrg last_writeout = instr; 2407ec681f3Smrg continue; 2417ec681f3Smrg } 2427ec681f3Smrg 2437ec681f3Smrg if (!last_writeout) 2447ec681f3Smrg continue; 2457ec681f3Smrg 2467ec681f3Smrg /* This is a real store, so move it to after dual-source stores */ 2477ec681f3Smrg exec_node_remove(&instr->node); 2487ec681f3Smrg exec_node_insert_after(&last_writeout->node, &instr->node); 2497ec681f3Smrg 2507ec681f3Smrg progress = true; 2517ec681f3Smrg } 2527ec681f3Smrg } 2537ec681f3Smrg } 2547ec681f3Smrg 2557ec681f3Smrg return progress; 2567ec681f3Smrg} 257