17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2018 Alyssa Rosenzweig 37ec681f3Smrg * Copyright (C) 2019-2021 Collabora, Ltd. 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 227ec681f3Smrg * SOFTWARE. 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#include "pan_blend.h" 267ec681f3Smrg 277ec681f3Smrg#ifdef PAN_ARCH 287ec681f3Smrg#include "pan_shader.h" 297ec681f3Smrg#endif 307ec681f3Smrg 317ec681f3Smrg#include "pan_texture.h" 327ec681f3Smrg#include "panfrost/util/pan_lower_framebuffer.h" 337ec681f3Smrg#include "util/format/u_format.h" 347ec681f3Smrg#include "compiler/nir/nir.h" 357ec681f3Smrg#include "compiler/nir/nir_builder.h" 367ec681f3Smrg#include "compiler/nir/nir_conversion_builder.h" 377ec681f3Smrg#include "compiler/nir/nir_lower_blend.h" 387ec681f3Smrg 397ec681f3Smrg#ifndef PAN_ARCH 407ec681f3Smrg 417ec681f3Smrg/* Fixed function blending */ 427ec681f3Smrg 437ec681f3Smrgstatic bool 447ec681f3Smrgfactor_is_supported(enum blend_factor factor) 457ec681f3Smrg{ 467ec681f3Smrg return factor != BLEND_FACTOR_SRC_ALPHA_SATURATE && 477ec681f3Smrg factor != BLEND_FACTOR_SRC1_COLOR && 487ec681f3Smrg factor != BLEND_FACTOR_SRC1_ALPHA; 497ec681f3Smrg} 507ec681f3Smrg 517ec681f3Smrg/* OpenGL allows encoding (src*dest + dest*src) which is incompatiblle with 527ec681f3Smrg * Midgard style blending since there are two multiplies. However, it may be 537ec681f3Smrg * factored as 2*src*dest = dest*(2*src), which can be encoded on Bifrost as 0 547ec681f3Smrg * + dest * (2*src) wih the new source_2 value of C. Detect this case. */ 557ec681f3Smrg 567ec681f3Smrgstatic bool 577ec681f3Smrgis_2srcdest(enum blend_func blend_func, 587ec681f3Smrg enum blend_factor src_factor, 597ec681f3Smrg bool invert_src, 607ec681f3Smrg enum blend_factor dest_factor, 617ec681f3Smrg bool invert_dest, 627ec681f3Smrg bool is_alpha) 637ec681f3Smrg{ 647ec681f3Smrg return (blend_func == BLEND_FUNC_ADD) && 657ec681f3Smrg ((src_factor == BLEND_FACTOR_DST_COLOR) || 667ec681f3Smrg ((src_factor == BLEND_FACTOR_DST_ALPHA) && is_alpha)) && 677ec681f3Smrg ((dest_factor == BLEND_FACTOR_SRC_COLOR) || 687ec681f3Smrg ((dest_factor == BLEND_FACTOR_SRC_ALPHA) && is_alpha)) && 697ec681f3Smrg !invert_src && !invert_dest; 707ec681f3Smrg} 717ec681f3Smrg 727ec681f3Smrgstatic bool 737ec681f3Smrgcan_fixed_function_equation(enum blend_func blend_func, 747ec681f3Smrg enum blend_factor src_factor, 757ec681f3Smrg bool invert_src, 767ec681f3Smrg enum blend_factor dest_factor, 777ec681f3Smrg bool invert_dest, 787ec681f3Smrg bool is_alpha, 797ec681f3Smrg bool supports_2src) 807ec681f3Smrg{ 817ec681f3Smrg if (is_2srcdest(blend_func, src_factor, invert_src, 827ec681f3Smrg dest_factor, invert_dest, is_alpha)) { 837ec681f3Smrg 847ec681f3Smrg return supports_2src; 857ec681f3Smrg } 867ec681f3Smrg 877ec681f3Smrg if (blend_func != BLEND_FUNC_ADD && 887ec681f3Smrg blend_func != BLEND_FUNC_SUBTRACT && 897ec681f3Smrg blend_func != BLEND_FUNC_REVERSE_SUBTRACT) 907ec681f3Smrg return false; 917ec681f3Smrg 927ec681f3Smrg if (!factor_is_supported(src_factor) || 937ec681f3Smrg !factor_is_supported(dest_factor)) 947ec681f3Smrg return false; 957ec681f3Smrg 967ec681f3Smrg if (src_factor != dest_factor && 977ec681f3Smrg src_factor != BLEND_FACTOR_ZERO && 987ec681f3Smrg dest_factor != BLEND_FACTOR_ZERO) 997ec681f3Smrg return false; 1007ec681f3Smrg 1017ec681f3Smrg return true; 1027ec681f3Smrg} 1037ec681f3Smrg 1047ec681f3Smrgstatic unsigned 1057ec681f3Smrgblend_factor_constant_mask(enum blend_factor factor) 1067ec681f3Smrg{ 1077ec681f3Smrg if (factor == BLEND_FACTOR_CONSTANT_COLOR) 1087ec681f3Smrg return 0b0111; /* RGB */ 1097ec681f3Smrg else if (factor == BLEND_FACTOR_CONSTANT_ALPHA) 1107ec681f3Smrg return 0b1000; /* A */ 1117ec681f3Smrg else 1127ec681f3Smrg return 0b0000; /* - */ 1137ec681f3Smrg} 1147ec681f3Smrg 1157ec681f3Smrgunsigned 1167ec681f3Smrgpan_blend_constant_mask(const struct pan_blend_equation eq) 1177ec681f3Smrg{ 1187ec681f3Smrg return blend_factor_constant_mask(eq.rgb_src_factor) | 1197ec681f3Smrg blend_factor_constant_mask(eq.rgb_dst_factor) | 1207ec681f3Smrg blend_factor_constant_mask(eq.alpha_src_factor) | 1217ec681f3Smrg blend_factor_constant_mask(eq.alpha_dst_factor); 1227ec681f3Smrg} 1237ec681f3Smrg 1247ec681f3Smrg/* Only "homogenous" (scalar or vector with all components equal) constants are 1257ec681f3Smrg * valid for fixed-function, so check for this condition */ 1267ec681f3Smrg 1277ec681f3Smrgbool 1287ec681f3Smrgpan_blend_is_homogenous_constant(unsigned mask, const float *constants) 1297ec681f3Smrg{ 1307ec681f3Smrg float constant = pan_blend_get_constant(mask, constants); 1317ec681f3Smrg 1327ec681f3Smrg u_foreach_bit(i, mask) { 1337ec681f3Smrg if (constants[i] != constant) 1347ec681f3Smrg return false; 1357ec681f3Smrg } 1367ec681f3Smrg 1377ec681f3Smrg return true; 1387ec681f3Smrg} 1397ec681f3Smrg 1407ec681f3Smrg/* Determines if an equation can run in fixed function */ 1417ec681f3Smrg 1427ec681f3Smrgbool 1437ec681f3Smrgpan_blend_can_fixed_function(const struct pan_blend_equation equation, 1447ec681f3Smrg bool supports_2src) 1457ec681f3Smrg{ 1467ec681f3Smrg return !equation.blend_enable || 1477ec681f3Smrg (can_fixed_function_equation(equation.rgb_func, 1487ec681f3Smrg equation.rgb_src_factor, 1497ec681f3Smrg equation.rgb_invert_src_factor, 1507ec681f3Smrg equation.rgb_dst_factor, 1517ec681f3Smrg equation.rgb_invert_dst_factor, 1527ec681f3Smrg false, supports_2src) && 1537ec681f3Smrg can_fixed_function_equation(equation.alpha_func, 1547ec681f3Smrg equation.alpha_src_factor, 1557ec681f3Smrg equation.alpha_invert_src_factor, 1567ec681f3Smrg equation.alpha_dst_factor, 1577ec681f3Smrg equation.alpha_invert_dst_factor, 1587ec681f3Smrg true, supports_2src)); 1597ec681f3Smrg} 1607ec681f3Smrg 1617ec681f3Smrgstatic enum mali_blend_operand_c 1627ec681f3Smrgto_c_factor(enum blend_factor factor) 1637ec681f3Smrg{ 1647ec681f3Smrg switch (factor) { 1657ec681f3Smrg case BLEND_FACTOR_ZERO: 1667ec681f3Smrg return MALI_BLEND_OPERAND_C_ZERO; 1677ec681f3Smrg 1687ec681f3Smrg case BLEND_FACTOR_SRC_ALPHA: 1697ec681f3Smrg return MALI_BLEND_OPERAND_C_SRC_ALPHA; 1707ec681f3Smrg 1717ec681f3Smrg case BLEND_FACTOR_DST_ALPHA: 1727ec681f3Smrg return MALI_BLEND_OPERAND_C_DEST_ALPHA; 1737ec681f3Smrg 1747ec681f3Smrg case BLEND_FACTOR_SRC_COLOR: 1757ec681f3Smrg return MALI_BLEND_OPERAND_C_SRC; 1767ec681f3Smrg 1777ec681f3Smrg case BLEND_FACTOR_DST_COLOR: 1787ec681f3Smrg return MALI_BLEND_OPERAND_C_DEST; 1797ec681f3Smrg 1807ec681f3Smrg case BLEND_FACTOR_CONSTANT_COLOR: 1817ec681f3Smrg case BLEND_FACTOR_CONSTANT_ALPHA: 1827ec681f3Smrg return MALI_BLEND_OPERAND_C_CONSTANT; 1837ec681f3Smrg 1847ec681f3Smrg default: 1857ec681f3Smrg unreachable("Unsupported blend factor"); 1867ec681f3Smrg } 1877ec681f3Smrg} 1887ec681f3Smrg 1897ec681f3Smrgstatic void 1907ec681f3Smrgto_panfrost_function(enum blend_func blend_func, 1917ec681f3Smrg enum blend_factor src_factor, 1927ec681f3Smrg bool invert_src, 1937ec681f3Smrg enum blend_factor dest_factor, 1947ec681f3Smrg bool invert_dest, 1957ec681f3Smrg bool is_alpha, 1967ec681f3Smrg struct MALI_BLEND_FUNCTION *function) 1977ec681f3Smrg{ 1987ec681f3Smrg assert(can_fixed_function_equation(blend_func, src_factor, invert_src, 1997ec681f3Smrg dest_factor, invert_dest, is_alpha, true)); 2007ec681f3Smrg 2017ec681f3Smrg if (src_factor == BLEND_FACTOR_ZERO && !invert_src) { 2027ec681f3Smrg function->a = MALI_BLEND_OPERAND_A_ZERO; 2037ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_DEST; 2047ec681f3Smrg if (blend_func == BLEND_FUNC_SUBTRACT) 2057ec681f3Smrg function->negate_b = true; 2067ec681f3Smrg function->invert_c = invert_dest; 2077ec681f3Smrg function->c = to_c_factor(dest_factor); 2087ec681f3Smrg } else if (src_factor == BLEND_FACTOR_ZERO && invert_src) { 2097ec681f3Smrg function->a = MALI_BLEND_OPERAND_A_SRC; 2107ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_DEST; 2117ec681f3Smrg if (blend_func == BLEND_FUNC_SUBTRACT) 2127ec681f3Smrg function->negate_b = true; 2137ec681f3Smrg else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT) 2147ec681f3Smrg function->negate_a = true; 2157ec681f3Smrg function->invert_c = invert_dest; 2167ec681f3Smrg function->c = to_c_factor(dest_factor); 2177ec681f3Smrg } else if (dest_factor == BLEND_FACTOR_ZERO && !invert_dest) { 2187ec681f3Smrg function->a = MALI_BLEND_OPERAND_A_ZERO; 2197ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_SRC; 2207ec681f3Smrg if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT) 2217ec681f3Smrg function->negate_b = true; 2227ec681f3Smrg function->invert_c = invert_src; 2237ec681f3Smrg function->c = to_c_factor(src_factor); 2247ec681f3Smrg } else if (dest_factor == BLEND_FACTOR_ZERO && invert_dest) { 2257ec681f3Smrg function->a = MALI_BLEND_OPERAND_A_DEST; 2267ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_SRC; 2277ec681f3Smrg if (blend_func == BLEND_FUNC_SUBTRACT) 2287ec681f3Smrg function->negate_a = true; 2297ec681f3Smrg else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT) 2307ec681f3Smrg function->negate_b = true; 2317ec681f3Smrg function->invert_c = invert_src; 2327ec681f3Smrg function->c = to_c_factor(src_factor); 2337ec681f3Smrg } else if (src_factor == dest_factor && invert_src == invert_dest) { 2347ec681f3Smrg function->a = MALI_BLEND_OPERAND_A_ZERO; 2357ec681f3Smrg function->invert_c = invert_src; 2367ec681f3Smrg function->c = to_c_factor(src_factor); 2377ec681f3Smrg 2387ec681f3Smrg switch (blend_func) { 2397ec681f3Smrg case BLEND_FUNC_ADD: 2407ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST; 2417ec681f3Smrg break; 2427ec681f3Smrg case BLEND_FUNC_REVERSE_SUBTRACT: 2437ec681f3Smrg function->negate_b = true; 2447ec681f3Smrg FALLTHROUGH; 2457ec681f3Smrg case BLEND_FUNC_SUBTRACT: 2467ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST; 2477ec681f3Smrg break; 2487ec681f3Smrg default: 2497ec681f3Smrg unreachable("Invalid blend function"); 2507ec681f3Smrg } 2517ec681f3Smrg } else if (is_2srcdest(blend_func, src_factor, invert_src, dest_factor, 2527ec681f3Smrg invert_dest, is_alpha)) { 2537ec681f3Smrg /* src*dest + dest*src = 2*src*dest = 0 + dest*(2*src) */ 2547ec681f3Smrg function->a = MALI_BLEND_OPERAND_A_ZERO; 2557ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_DEST; 2567ec681f3Smrg function->c = MALI_BLEND_OPERAND_C_SRC_X_2; 2577ec681f3Smrg } else { 2587ec681f3Smrg assert(src_factor == dest_factor && invert_src != invert_dest); 2597ec681f3Smrg 2607ec681f3Smrg function->a = MALI_BLEND_OPERAND_A_DEST; 2617ec681f3Smrg function->invert_c = invert_src; 2627ec681f3Smrg function->c = to_c_factor(src_factor); 2637ec681f3Smrg 2647ec681f3Smrg switch (blend_func) { 2657ec681f3Smrg case BLEND_FUNC_ADD: 2667ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST; 2677ec681f3Smrg break; 2687ec681f3Smrg case BLEND_FUNC_REVERSE_SUBTRACT: 2697ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST; 2707ec681f3Smrg function->negate_b = true; 2717ec681f3Smrg break; 2727ec681f3Smrg case BLEND_FUNC_SUBTRACT: 2737ec681f3Smrg function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST; 2747ec681f3Smrg function->negate_a = true; 2757ec681f3Smrg break; 2767ec681f3Smrg default: 2777ec681f3Smrg unreachable("Invalid blend function\n"); 2787ec681f3Smrg } 2797ec681f3Smrg } 2807ec681f3Smrg} 2817ec681f3Smrg 2827ec681f3Smrgbool 2837ec681f3Smrgpan_blend_is_opaque(const struct pan_blend_equation equation) 2847ec681f3Smrg{ 2857ec681f3Smrg /* If a channel is masked out, we can't use opaque mode even if 2867ec681f3Smrg * blending is disabled, since we need a tilebuffer read in there */ 2877ec681f3Smrg if (equation.color_mask != 0xF) 2887ec681f3Smrg return false; 2897ec681f3Smrg 2907ec681f3Smrg /* With nothing masked out, disabled bledning is opaque */ 2917ec681f3Smrg if (!equation.blend_enable) 2927ec681f3Smrg return true; 2937ec681f3Smrg 2947ec681f3Smrg /* Also detect open-coded opaque blending */ 2957ec681f3Smrg return equation.rgb_src_factor == BLEND_FACTOR_ZERO && 2967ec681f3Smrg equation.rgb_invert_src_factor && 2977ec681f3Smrg equation.rgb_dst_factor == BLEND_FACTOR_ZERO && 2987ec681f3Smrg !equation.rgb_invert_dst_factor && 2997ec681f3Smrg (equation.rgb_func == BLEND_FUNC_ADD || 3007ec681f3Smrg equation.rgb_func == BLEND_FUNC_SUBTRACT) && 3017ec681f3Smrg equation.alpha_src_factor == BLEND_FACTOR_ZERO && 3027ec681f3Smrg equation.alpha_invert_src_factor && 3037ec681f3Smrg equation.alpha_dst_factor == BLEND_FACTOR_ZERO && 3047ec681f3Smrg !equation.alpha_invert_dst_factor && 3057ec681f3Smrg (equation.alpha_func == BLEND_FUNC_ADD || 3067ec681f3Smrg equation.alpha_func == BLEND_FUNC_SUBTRACT); 3077ec681f3Smrg} 3087ec681f3Smrg 3097ec681f3Smrgstatic bool 3107ec681f3Smrgis_dest_factor(enum blend_factor factor, bool alpha) 3117ec681f3Smrg{ 3127ec681f3Smrg return factor == BLEND_FACTOR_DST_ALPHA || 3137ec681f3Smrg factor == BLEND_FACTOR_DST_COLOR || 3147ec681f3Smrg (factor == BLEND_FACTOR_SRC_ALPHA_SATURATE && !alpha); 3157ec681f3Smrg} 3167ec681f3Smrg 3177ec681f3Smrg/* Determines if a blend equation reads back the destination. This can occur by 3187ec681f3Smrg * explicitly referencing the destination in the blend equation, or by using a 3197ec681f3Smrg * partial writemask. */ 3207ec681f3Smrg 3217ec681f3Smrgbool 3227ec681f3Smrgpan_blend_reads_dest(const struct pan_blend_equation equation) 3237ec681f3Smrg{ 3247ec681f3Smrg return (equation.color_mask && equation.color_mask != 0xF) || 3257ec681f3Smrg is_dest_factor(equation.rgb_src_factor, false) || 3267ec681f3Smrg is_dest_factor(equation.alpha_src_factor, true) || 3277ec681f3Smrg equation.rgb_dst_factor != BLEND_FACTOR_ZERO || 3287ec681f3Smrg equation.rgb_invert_dst_factor || 3297ec681f3Smrg equation.alpha_dst_factor != BLEND_FACTOR_ZERO || 3307ec681f3Smrg equation.alpha_invert_dst_factor; 3317ec681f3Smrg} 3327ec681f3Smrg 3337ec681f3Smrg/* Create the descriptor for a fixed blend mode given the corresponding API 3347ec681f3Smrg * state. Assumes the equation can be represented as fixed-function. */ 3357ec681f3Smrg 3367ec681f3Smrgvoid 3377ec681f3Smrgpan_blend_to_fixed_function_equation(const struct pan_blend_equation equation, 3387ec681f3Smrg struct MALI_BLEND_EQUATION *out) 3397ec681f3Smrg{ 3407ec681f3Smrg /* If no blending is enabled, default back on `replace` mode */ 3417ec681f3Smrg if (!equation.blend_enable) { 3427ec681f3Smrg out->color_mask = equation.color_mask; 3437ec681f3Smrg out->rgb.a = MALI_BLEND_OPERAND_A_SRC; 3447ec681f3Smrg out->rgb.b = MALI_BLEND_OPERAND_B_SRC; 3457ec681f3Smrg out->rgb.c = MALI_BLEND_OPERAND_C_ZERO; 3467ec681f3Smrg out->alpha.a = MALI_BLEND_OPERAND_A_SRC; 3477ec681f3Smrg out->alpha.b = MALI_BLEND_OPERAND_B_SRC; 3487ec681f3Smrg out->alpha.c = MALI_BLEND_OPERAND_C_ZERO; 3497ec681f3Smrg return; 3507ec681f3Smrg } 3517ec681f3Smrg 3527ec681f3Smrg /* Compile the fixed-function blend */ 3537ec681f3Smrg to_panfrost_function(equation.rgb_func, 3547ec681f3Smrg equation.rgb_src_factor, 3557ec681f3Smrg equation.rgb_invert_src_factor, 3567ec681f3Smrg equation.rgb_dst_factor, 3577ec681f3Smrg equation.rgb_invert_dst_factor, 3587ec681f3Smrg false, &out->rgb); 3597ec681f3Smrg 3607ec681f3Smrg to_panfrost_function(equation.alpha_func, 3617ec681f3Smrg equation.alpha_src_factor, 3627ec681f3Smrg equation.alpha_invert_src_factor, 3637ec681f3Smrg equation.alpha_dst_factor, 3647ec681f3Smrg equation.alpha_invert_dst_factor, 3657ec681f3Smrg true, &out->alpha); 3667ec681f3Smrg out->color_mask = equation.color_mask; 3677ec681f3Smrg} 3687ec681f3Smrg 3697ec681f3Smrguint32_t 3707ec681f3Smrgpan_pack_blend(const struct pan_blend_equation equation) 3717ec681f3Smrg{ 3727ec681f3Smrg STATIC_ASSERT(sizeof(uint32_t) == MALI_BLEND_EQUATION_LENGTH); 3737ec681f3Smrg 3747ec681f3Smrg uint32_t out = 0; 3757ec681f3Smrg 3767ec681f3Smrg pan_pack(&out, BLEND_EQUATION, cfg) { 3777ec681f3Smrg pan_blend_to_fixed_function_equation(equation, &cfg); 3787ec681f3Smrg } 3797ec681f3Smrg 3807ec681f3Smrg return out; 3817ec681f3Smrg} 3827ec681f3Smrg 3837ec681f3Smrgstatic uint32_t pan_blend_shader_key_hash(const void *key) 3847ec681f3Smrg{ 3857ec681f3Smrg return _mesa_hash_data(key, sizeof(struct pan_blend_shader_key)); 3867ec681f3Smrg} 3877ec681f3Smrg 3887ec681f3Smrgstatic bool pan_blend_shader_key_equal(const void *a, const void *b) 3897ec681f3Smrg{ 3907ec681f3Smrg return !memcmp(a, b, sizeof(struct pan_blend_shader_key)); 3917ec681f3Smrg} 3927ec681f3Smrg 3937ec681f3Smrgvoid 3947ec681f3Smrgpan_blend_shaders_init(struct panfrost_device *dev) 3957ec681f3Smrg{ 3967ec681f3Smrg dev->blend_shaders.shaders = 3977ec681f3Smrg _mesa_hash_table_create(NULL, pan_blend_shader_key_hash, 3987ec681f3Smrg pan_blend_shader_key_equal); 3997ec681f3Smrg pthread_mutex_init(&dev->blend_shaders.lock, NULL); 4007ec681f3Smrg} 4017ec681f3Smrg 4027ec681f3Smrgvoid 4037ec681f3Smrgpan_blend_shaders_cleanup(struct panfrost_device *dev) 4047ec681f3Smrg{ 4057ec681f3Smrg _mesa_hash_table_destroy(dev->blend_shaders.shaders, NULL); 4067ec681f3Smrg} 4077ec681f3Smrg 4087ec681f3Smrg#else /* ifndef PAN_ARCH */ 4097ec681f3Smrg 4107ec681f3Smrgstatic const char * 4117ec681f3Smrglogicop_str(enum pipe_logicop logicop) 4127ec681f3Smrg{ 4137ec681f3Smrg switch (logicop) { 4147ec681f3Smrg case PIPE_LOGICOP_CLEAR: return "clear"; 4157ec681f3Smrg case PIPE_LOGICOP_NOR: return "nor"; 4167ec681f3Smrg case PIPE_LOGICOP_AND_INVERTED: return "and-inverted"; 4177ec681f3Smrg case PIPE_LOGICOP_COPY_INVERTED: return "copy-inverted"; 4187ec681f3Smrg case PIPE_LOGICOP_AND_REVERSE: return "and-reverse"; 4197ec681f3Smrg case PIPE_LOGICOP_INVERT: return "invert"; 4207ec681f3Smrg case PIPE_LOGICOP_XOR: return "xor"; 4217ec681f3Smrg case PIPE_LOGICOP_NAND: return "nand"; 4227ec681f3Smrg case PIPE_LOGICOP_AND: return "and"; 4237ec681f3Smrg case PIPE_LOGICOP_EQUIV: return "equiv"; 4247ec681f3Smrg case PIPE_LOGICOP_NOOP: return "noop"; 4257ec681f3Smrg case PIPE_LOGICOP_OR_INVERTED: return "or-inverted"; 4267ec681f3Smrg case PIPE_LOGICOP_COPY: return "copy"; 4277ec681f3Smrg case PIPE_LOGICOP_OR_REVERSE: return "or-reverse"; 4287ec681f3Smrg case PIPE_LOGICOP_OR: return "or"; 4297ec681f3Smrg case PIPE_LOGICOP_SET: return "set"; 4307ec681f3Smrg default: unreachable("Invalid logicop\n"); 4317ec681f3Smrg } 4327ec681f3Smrg} 4337ec681f3Smrg 4347ec681f3Smrgstatic void 4357ec681f3Smrgget_equation_str(const struct pan_blend_rt_state *rt_state, 4367ec681f3Smrg char *str, unsigned len) 4377ec681f3Smrg{ 4387ec681f3Smrg const char *funcs[] = { 4397ec681f3Smrg "add", "sub", "reverse_sub", "min", "max", 4407ec681f3Smrg }; 4417ec681f3Smrg const char *factors[] = { 4427ec681f3Smrg "zero", "src_color", "src1_color", "dst_color", 4437ec681f3Smrg "src_alpha", "src1_alpha", "dst_alpha", 4447ec681f3Smrg "const_color", "const_alpha", "src_alpha_sat", 4457ec681f3Smrg }; 4467ec681f3Smrg int ret; 4477ec681f3Smrg 4487ec681f3Smrg if (!rt_state->equation.blend_enable) { 4497ec681f3Smrg ret = snprintf(str, len, "replace"); 4507ec681f3Smrg assert(ret > 0); 4517ec681f3Smrg return; 4527ec681f3Smrg } 4537ec681f3Smrg 4547ec681f3Smrg if (rt_state->equation.color_mask & 7) { 4557ec681f3Smrg assert(rt_state->equation.rgb_func < ARRAY_SIZE(funcs)); 4567ec681f3Smrg assert(rt_state->equation.rgb_src_factor < ARRAY_SIZE(factors)); 4577ec681f3Smrg assert(rt_state->equation.rgb_dst_factor < ARRAY_SIZE(factors)); 4587ec681f3Smrg ret = snprintf(str, len, "%s%s%s(func=%s,src_factor=%s%s,dst_factor=%s%s)%s", 4597ec681f3Smrg (rt_state->equation.color_mask & 1) ? "R" : "", 4607ec681f3Smrg (rt_state->equation.color_mask & 2) ? "G" : "", 4617ec681f3Smrg (rt_state->equation.color_mask & 4) ? "B" : "", 4627ec681f3Smrg funcs[rt_state->equation.rgb_func], 4637ec681f3Smrg rt_state->equation.rgb_invert_src_factor ? "-" : "", 4647ec681f3Smrg factors[rt_state->equation.rgb_src_factor], 4657ec681f3Smrg rt_state->equation.rgb_invert_dst_factor ? "-" : "", 4667ec681f3Smrg factors[rt_state->equation.rgb_dst_factor], 4677ec681f3Smrg rt_state->equation.color_mask & 8 ? ";" : ""); 4687ec681f3Smrg assert(ret > 0); 4697ec681f3Smrg str += ret; 4707ec681f3Smrg len -= ret; 4717ec681f3Smrg } 4727ec681f3Smrg 4737ec681f3Smrg if (rt_state->equation.color_mask & 8) { 4747ec681f3Smrg assert(rt_state->equation.alpha_func < ARRAY_SIZE(funcs)); 4757ec681f3Smrg assert(rt_state->equation.alpha_src_factor < ARRAY_SIZE(factors)); 4767ec681f3Smrg assert(rt_state->equation.alpha_dst_factor < ARRAY_SIZE(factors)); 4777ec681f3Smrg ret = snprintf(str, len, "A(func=%s,src_factor=%s%s,dst_factor=%s%s)", 4787ec681f3Smrg funcs[rt_state->equation.alpha_func], 4797ec681f3Smrg rt_state->equation.alpha_invert_src_factor ? "-" : "", 4807ec681f3Smrg factors[rt_state->equation.alpha_src_factor], 4817ec681f3Smrg rt_state->equation.alpha_invert_dst_factor ? "-" : "", 4827ec681f3Smrg factors[rt_state->equation.alpha_dst_factor]); 4837ec681f3Smrg assert(ret > 0); 4847ec681f3Smrg str += ret; 4857ec681f3Smrg len -= ret; 4867ec681f3Smrg } 4877ec681f3Smrg} 4887ec681f3Smrg 4897ec681f3Smrgstatic bool 4907ec681f3Smrgpan_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data) 4917ec681f3Smrg{ 4927ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 4937ec681f3Smrg return false; 4947ec681f3Smrg 4957ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 4967ec681f3Smrg if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba) 4977ec681f3Smrg return false; 4987ec681f3Smrg 4997ec681f3Smrg float *floats = data; 5007ec681f3Smrg const nir_const_value constants[4] = { 5017ec681f3Smrg { .f32 = floats[0] }, 5027ec681f3Smrg { .f32 = floats[1] }, 5037ec681f3Smrg { .f32 = floats[2] }, 5047ec681f3Smrg { .f32 = floats[3] } 5057ec681f3Smrg }; 5067ec681f3Smrg 5077ec681f3Smrg b->cursor = nir_after_instr(instr); 5087ec681f3Smrg nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants); 5097ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant); 5107ec681f3Smrg nir_instr_remove(instr); 5117ec681f3Smrg return true; 5127ec681f3Smrg} 5137ec681f3Smrg 5147ec681f3Smrgnir_shader * 5157ec681f3SmrgGENX(pan_blend_create_shader)(const struct panfrost_device *dev, 5167ec681f3Smrg const struct pan_blend_state *state, 5177ec681f3Smrg nir_alu_type src0_type, 5187ec681f3Smrg nir_alu_type src1_type, 5197ec681f3Smrg unsigned rt) 5207ec681f3Smrg{ 5217ec681f3Smrg const struct pan_blend_rt_state *rt_state = &state->rts[rt]; 5227ec681f3Smrg char equation_str[128] = { 0 }; 5237ec681f3Smrg 5247ec681f3Smrg get_equation_str(rt_state, equation_str, sizeof(equation_str)); 5257ec681f3Smrg 5267ec681f3Smrg nir_builder b = 5277ec681f3Smrg nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 5287ec681f3Smrg GENX(pan_shader_get_compiler_options)(), 5297ec681f3Smrg "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", 5307ec681f3Smrg rt, util_format_name(rt_state->format), 5317ec681f3Smrg rt_state->nr_samples, 5327ec681f3Smrg state->logicop_enable ? "logicop" : "equation", 5337ec681f3Smrg state->logicop_enable ? 5347ec681f3Smrg logicop_str(state->logicop_func) : equation_str); 5357ec681f3Smrg 5367ec681f3Smrg const struct util_format_description *format_desc = 5377ec681f3Smrg util_format_description(rt_state->format); 5387ec681f3Smrg nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc); 5397ec681f3Smrg enum glsl_base_type glsl_type = nir_get_glsl_base_type_for_nir_type(nir_type); 5407ec681f3Smrg 5417ec681f3Smrg nir_lower_blend_options options = { 5427ec681f3Smrg .logicop_enable = state->logicop_enable, 5437ec681f3Smrg .logicop_func = state->logicop_func, 5447ec681f3Smrg .rt[0].colormask = rt_state->equation.color_mask, 5457ec681f3Smrg .format[0] = rt_state->format 5467ec681f3Smrg }; 5477ec681f3Smrg 5487ec681f3Smrg if (!rt_state->equation.blend_enable) { 5497ec681f3Smrg static const nir_lower_blend_channel replace = { 5507ec681f3Smrg .func = BLEND_FUNC_ADD, 5517ec681f3Smrg .src_factor = BLEND_FACTOR_ZERO, 5527ec681f3Smrg .invert_src_factor = true, 5537ec681f3Smrg .dst_factor = BLEND_FACTOR_ZERO, 5547ec681f3Smrg .invert_dst_factor = false, 5557ec681f3Smrg }; 5567ec681f3Smrg 5577ec681f3Smrg options.rt[0].rgb = replace; 5587ec681f3Smrg options.rt[0].alpha = replace; 5597ec681f3Smrg } else { 5607ec681f3Smrg options.rt[0].rgb.func = rt_state->equation.rgb_func; 5617ec681f3Smrg options.rt[0].rgb.src_factor = rt_state->equation.rgb_src_factor; 5627ec681f3Smrg options.rt[0].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor; 5637ec681f3Smrg options.rt[0].rgb.dst_factor = rt_state->equation.rgb_dst_factor; 5647ec681f3Smrg options.rt[0].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor; 5657ec681f3Smrg options.rt[0].alpha.func = rt_state->equation.alpha_func; 5667ec681f3Smrg options.rt[0].alpha.src_factor = rt_state->equation.alpha_src_factor; 5677ec681f3Smrg options.rt[0].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor; 5687ec681f3Smrg options.rt[0].alpha.dst_factor = rt_state->equation.alpha_dst_factor; 5697ec681f3Smrg options.rt[0].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor; 5707ec681f3Smrg } 5717ec681f3Smrg 5727ec681f3Smrg nir_alu_type src_types[] = { src0_type ?: nir_type_float32, src1_type ?: nir_type_float32 }; 5737ec681f3Smrg 5747ec681f3Smrg /* HACK: workaround buggy TGSI shaders (u_blitter) */ 5757ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) { 5767ec681f3Smrg src_types[i] = nir_alu_type_get_base_type(nir_type) | 5777ec681f3Smrg nir_alu_type_get_type_size(src_types[i]); 5787ec681f3Smrg } 5797ec681f3Smrg 5807ec681f3Smrg nir_variable *c_src = 5817ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_in, 5827ec681f3Smrg glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4), 5837ec681f3Smrg "gl_Color"); 5847ec681f3Smrg c_src->data.location = VARYING_SLOT_COL0; 5857ec681f3Smrg nir_variable *c_src1 = 5867ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_in, 5877ec681f3Smrg glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4), 5887ec681f3Smrg "gl_Color1"); 5897ec681f3Smrg c_src1->data.location = VARYING_SLOT_VAR0; 5907ec681f3Smrg c_src1->data.driver_location = 1; 5917ec681f3Smrg nir_variable *c_out = 5927ec681f3Smrg nir_variable_create(b.shader, nir_var_shader_out, 5937ec681f3Smrg glsl_vector_type(glsl_type, 4), 5947ec681f3Smrg "gl_FragColor"); 5957ec681f3Smrg c_out->data.location = FRAG_RESULT_DATA0; 5967ec681f3Smrg 5977ec681f3Smrg nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)}; 5987ec681f3Smrg 5997ec681f3Smrg /* Saturate integer conversions */ 6007ec681f3Smrg for (int i = 0; i < ARRAY_SIZE(s_src); ++i) { 6017ec681f3Smrg nir_alu_type T = nir_alu_type_get_base_type(nir_type); 6027ec681f3Smrg s_src[i] = nir_convert_with_rounding(&b, s_src[i], 6037ec681f3Smrg src_types[i], nir_type, 6047ec681f3Smrg nir_rounding_mode_undef, 6057ec681f3Smrg T != nir_type_float); 6067ec681f3Smrg } 6077ec681f3Smrg 6087ec681f3Smrg /* Build a trivial blend shader */ 6097ec681f3Smrg nir_store_var(&b, c_out, s_src[0], 0xFF); 6107ec681f3Smrg 6117ec681f3Smrg options.src1 = s_src[1]; 6127ec681f3Smrg 6137ec681f3Smrg NIR_PASS_V(b.shader, nir_lower_blend, options); 6147ec681f3Smrg nir_shader_instructions_pass(b.shader, pan_inline_blend_constants, 6157ec681f3Smrg nir_metadata_block_index | nir_metadata_dominance, 6167ec681f3Smrg (void *) state->constants); 6177ec681f3Smrg 6187ec681f3Smrg return b.shader; 6197ec681f3Smrg} 6207ec681f3Smrg 6217ec681f3Smrg#if PAN_ARCH >= 6 6227ec681f3Smrguint64_t 6237ec681f3SmrgGENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev, 6247ec681f3Smrg enum pipe_format fmt, unsigned rt, 6257ec681f3Smrg unsigned force_size, bool dithered) 6267ec681f3Smrg{ 6277ec681f3Smrg const struct util_format_description *desc = util_format_description(fmt); 6287ec681f3Smrg uint64_t res; 6297ec681f3Smrg 6307ec681f3Smrg pan_pack(&res, INTERNAL_BLEND, cfg) { 6317ec681f3Smrg cfg.mode = MALI_BLEND_MODE_OPAQUE; 6327ec681f3Smrg cfg.fixed_function.num_comps = desc->nr_channels; 6337ec681f3Smrg cfg.fixed_function.rt = rt; 6347ec681f3Smrg 6357ec681f3Smrg nir_alu_type T = pan_unpacked_type_for_format(desc); 6367ec681f3Smrg 6377ec681f3Smrg if (force_size) 6387ec681f3Smrg T = nir_alu_type_get_base_type(T) | force_size; 6397ec681f3Smrg 6407ec681f3Smrg switch (T) { 6417ec681f3Smrg case nir_type_float16: 6427ec681f3Smrg cfg.fixed_function.conversion.register_format = 6437ec681f3Smrg MALI_REGISTER_FILE_FORMAT_F16; 6447ec681f3Smrg break; 6457ec681f3Smrg case nir_type_float32: 6467ec681f3Smrg cfg.fixed_function.conversion.register_format = 6477ec681f3Smrg MALI_REGISTER_FILE_FORMAT_F32; 6487ec681f3Smrg break; 6497ec681f3Smrg case nir_type_int8: 6507ec681f3Smrg case nir_type_int16: 6517ec681f3Smrg cfg.fixed_function.conversion.register_format = 6527ec681f3Smrg MALI_REGISTER_FILE_FORMAT_I16; 6537ec681f3Smrg break; 6547ec681f3Smrg case nir_type_int32: 6557ec681f3Smrg cfg.fixed_function.conversion.register_format = 6567ec681f3Smrg MALI_REGISTER_FILE_FORMAT_I32; 6577ec681f3Smrg break; 6587ec681f3Smrg case nir_type_uint8: 6597ec681f3Smrg case nir_type_uint16: 6607ec681f3Smrg cfg.fixed_function.conversion.register_format = 6617ec681f3Smrg MALI_REGISTER_FILE_FORMAT_U16; 6627ec681f3Smrg break; 6637ec681f3Smrg case nir_type_uint32: 6647ec681f3Smrg cfg.fixed_function.conversion.register_format = 6657ec681f3Smrg MALI_REGISTER_FILE_FORMAT_U32; 6667ec681f3Smrg break; 6677ec681f3Smrg default: 6687ec681f3Smrg unreachable("Invalid format"); 6697ec681f3Smrg } 6707ec681f3Smrg 6717ec681f3Smrg cfg.fixed_function.conversion.memory_format = 6727ec681f3Smrg panfrost_format_to_bifrost_blend(dev, fmt, dithered); 6737ec681f3Smrg } 6747ec681f3Smrg 6757ec681f3Smrg return res; 6767ec681f3Smrg} 6777ec681f3Smrg#endif 6787ec681f3Smrg 6797ec681f3Smrgstruct pan_blend_shader_variant * 6807ec681f3SmrgGENX(pan_blend_get_shader_locked)(const struct panfrost_device *dev, 6817ec681f3Smrg const struct pan_blend_state *state, 6827ec681f3Smrg nir_alu_type src0_type, 6837ec681f3Smrg nir_alu_type src1_type, 6847ec681f3Smrg unsigned rt) 6857ec681f3Smrg{ 6867ec681f3Smrg struct pan_blend_shader_key key = { 6877ec681f3Smrg .format = state->rts[rt].format, 6887ec681f3Smrg .src0_type = src0_type, 6897ec681f3Smrg .src1_type = src1_type, 6907ec681f3Smrg .rt = rt, 6917ec681f3Smrg .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0, 6927ec681f3Smrg .logicop_enable = state->logicop_enable, 6937ec681f3Smrg .logicop_func = state->logicop_func, 6947ec681f3Smrg .nr_samples = state->rts[rt].nr_samples, 6957ec681f3Smrg .equation = state->rts[rt].equation, 6967ec681f3Smrg }; 6977ec681f3Smrg 6987ec681f3Smrg struct hash_entry *he = _mesa_hash_table_search(dev->blend_shaders.shaders, &key); 6997ec681f3Smrg struct pan_blend_shader *shader = he ? he->data : NULL; 7007ec681f3Smrg 7017ec681f3Smrg if (!shader) { 7027ec681f3Smrg shader = rzalloc(dev->blend_shaders.shaders, struct pan_blend_shader); 7037ec681f3Smrg shader->key = key; 7047ec681f3Smrg list_inithead(&shader->variants); 7057ec681f3Smrg _mesa_hash_table_insert(dev->blend_shaders.shaders, &shader->key, shader); 7067ec681f3Smrg } 7077ec681f3Smrg 7087ec681f3Smrg list_for_each_entry(struct pan_blend_shader_variant, iter, 7097ec681f3Smrg &shader->variants, node) { 7107ec681f3Smrg if (!key.has_constants || 7117ec681f3Smrg !memcmp(iter->constants, state->constants, sizeof(iter->constants))) { 7127ec681f3Smrg return iter; 7137ec681f3Smrg } 7147ec681f3Smrg } 7157ec681f3Smrg 7167ec681f3Smrg struct pan_blend_shader_variant *variant = NULL; 7177ec681f3Smrg 7187ec681f3Smrg if (shader->nvariants < PAN_BLEND_SHADER_MAX_VARIANTS) { 7197ec681f3Smrg variant = rzalloc(shader, struct pan_blend_shader_variant); 7207ec681f3Smrg memcpy(variant->constants, state->constants, sizeof(variant->constants)); 7217ec681f3Smrg util_dynarray_init(&variant->binary, variant); 7227ec681f3Smrg list_add(&variant->node, &shader->variants); 7237ec681f3Smrg shader->nvariants++; 7247ec681f3Smrg } else { 7257ec681f3Smrg variant = list_last_entry(&shader->variants, struct pan_blend_shader_variant, node); 7267ec681f3Smrg list_del(&variant->node); 7277ec681f3Smrg list_add(&variant->node, &shader->variants); 7287ec681f3Smrg util_dynarray_clear(&variant->binary); 7297ec681f3Smrg } 7307ec681f3Smrg 7317ec681f3Smrg nir_shader *nir = 7327ec681f3Smrg GENX(pan_blend_create_shader)(dev, state, src0_type, src1_type, rt); 7337ec681f3Smrg 7347ec681f3Smrg /* Compile the NIR shader */ 7357ec681f3Smrg struct panfrost_compile_inputs inputs = { 7367ec681f3Smrg .gpu_id = dev->gpu_id, 7377ec681f3Smrg .is_blend = true, 7387ec681f3Smrg .blend.rt = shader->key.rt, 7397ec681f3Smrg .blend.nr_samples = key.nr_samples, 7407ec681f3Smrg .rt_formats = { key.format }, 7417ec681f3Smrg }; 7427ec681f3Smrg 7437ec681f3Smrg#if PAN_ARCH >= 6 7447ec681f3Smrg inputs.blend.bifrost_blend_desc = 7457ec681f3Smrg GENX(pan_blend_get_internal_desc)(dev, key.format, key.rt, 0, false); 7467ec681f3Smrg#endif 7477ec681f3Smrg 7487ec681f3Smrg struct pan_shader_info info; 7497ec681f3Smrg 7507ec681f3Smrg GENX(pan_shader_compile)(nir, &inputs, &variant->binary, &info); 7517ec681f3Smrg 7527ec681f3Smrg variant->work_reg_count = info.work_reg_count; 7537ec681f3Smrg 7547ec681f3Smrg#if PAN_ARCH <= 5 7557ec681f3Smrg variant->first_tag = info.midgard.first_tag; 7567ec681f3Smrg#endif 7577ec681f3Smrg 7587ec681f3Smrg ralloc_free(nir); 7597ec681f3Smrg 7607ec681f3Smrg return variant; 7617ec681f3Smrg} 7627ec681f3Smrg#endif /* ifndef PAN_ARCH */ 763