17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2020 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors (Collabora): 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg/** 287ec681f3Smrg * Implements framebuffer format conversions in software for Midgard/Bifrost 297ec681f3Smrg * blend shaders. This pass is designed for a single render target; Midgard 307ec681f3Smrg * duplicates blend shaders for MRT to simplify everything. A particular 317ec681f3Smrg * framebuffer format may be categorized as 1) typed load available, 2) typed 327ec681f3Smrg * unpack available, or 3) software unpack only, and likewise for stores. The 337ec681f3Smrg * first two types are handled in the compiler backend directly, so this module 347ec681f3Smrg * is responsible for identifying type 3 formats (hardware dependent) and 357ec681f3Smrg * inserting appropriate ALU code to perform the conversion from the packed 367ec681f3Smrg * type to a designated unpacked type, and vice versa. 377ec681f3Smrg * 387ec681f3Smrg * The unpacked type depends on the format: 397ec681f3Smrg * 407ec681f3Smrg * - For 32-bit float formats or >8-bit UNORM, 32-bit floats. 417ec681f3Smrg * - For other floats, 16-bit floats. 427ec681f3Smrg * - For 32-bit ints, 32-bit ints. 437ec681f3Smrg * - For 8-bit ints, 8-bit ints. 447ec681f3Smrg * - For other ints, 16-bit ints. 457ec681f3Smrg * 467ec681f3Smrg * The rationale is to optimize blending and logic op instructions by using the 477ec681f3Smrg * smallest precision necessary to store the pixel losslessly. 487ec681f3Smrg */ 497ec681f3Smrg 507ec681f3Smrg#include "compiler/nir/nir.h" 517ec681f3Smrg#include "compiler/nir/nir_builder.h" 527ec681f3Smrg#include "compiler/nir/nir_format_convert.h" 537ec681f3Smrg#include "util/format/u_format.h" 547ec681f3Smrg#include "pan_lower_framebuffer.h" 557ec681f3Smrg#include "panfrost-quirks.h" 567ec681f3Smrg 577ec681f3Smrg/* Determines the unpacked type best suiting a given format, so the rest of the 587ec681f3Smrg * pipeline may be adjusted accordingly */ 597ec681f3Smrg 607ec681f3Smrgnir_alu_type 617ec681f3Smrgpan_unpacked_type_for_format(const struct util_format_description *desc) 627ec681f3Smrg{ 637ec681f3Smrg int c = util_format_get_first_non_void_channel(desc->format); 647ec681f3Smrg 657ec681f3Smrg if (c == -1) 667ec681f3Smrg unreachable("Void format not renderable"); 677ec681f3Smrg 687ec681f3Smrg bool large = (desc->channel[c].size > 16); 697ec681f3Smrg bool large_norm = (desc->channel[c].size > 8); 707ec681f3Smrg bool bit8 = (desc->channel[c].size == 8); 717ec681f3Smrg assert(desc->channel[c].size <= 32); 727ec681f3Smrg 737ec681f3Smrg if (desc->channel[c].normalized) 747ec681f3Smrg return large_norm ? nir_type_float32 : nir_type_float16; 757ec681f3Smrg 767ec681f3Smrg switch (desc->channel[c].type) { 777ec681f3Smrg case UTIL_FORMAT_TYPE_UNSIGNED: 787ec681f3Smrg return bit8 ? nir_type_uint8 : 797ec681f3Smrg large ? nir_type_uint32 : nir_type_uint16; 807ec681f3Smrg case UTIL_FORMAT_TYPE_SIGNED: 817ec681f3Smrg return bit8 ? nir_type_int8 : 827ec681f3Smrg large ? nir_type_int32 : nir_type_int16; 837ec681f3Smrg case UTIL_FORMAT_TYPE_FLOAT: 847ec681f3Smrg return large ? nir_type_float32 : nir_type_float16; 857ec681f3Smrg default: 867ec681f3Smrg unreachable("Format not renderable"); 877ec681f3Smrg } 887ec681f3Smrg} 897ec681f3Smrg 907ec681f3Smrgstatic enum pan_format_class 917ec681f3Smrgpan_format_class_load(const struct util_format_description *desc, unsigned quirks) 927ec681f3Smrg{ 937ec681f3Smrg /* Pure integers can be loaded via EXT_framebuffer_fetch and should be 947ec681f3Smrg * handled as a raw load with a size conversion (it's cheap). Likewise, 957ec681f3Smrg * since float framebuffers are internally implemented as raw (i.e. 967ec681f3Smrg * integer) framebuffers with blend shaders to go back and forth, they 977ec681f3Smrg * should be s/w as well */ 987ec681f3Smrg 997ec681f3Smrg if (util_format_is_pure_integer(desc->format) || util_format_is_float(desc->format)) 1007ec681f3Smrg return PAN_FORMAT_SOFTWARE; 1017ec681f3Smrg 1027ec681f3Smrg /* Check if we can do anything better than software architecturally */ 1037ec681f3Smrg if (quirks & MIDGARD_NO_TYPED_BLEND_LOADS) { 1047ec681f3Smrg return (quirks & NO_BLEND_PACKS) 1057ec681f3Smrg ? PAN_FORMAT_SOFTWARE : PAN_FORMAT_PACK; 1067ec681f3Smrg } 1077ec681f3Smrg 1087ec681f3Smrg /* Some formats are missing as typed on some GPUs but have unpacks */ 1097ec681f3Smrg if (quirks & MIDGARD_MISSING_LOADS) { 1107ec681f3Smrg switch (desc->format) { 1117ec681f3Smrg case PIPE_FORMAT_R11G11B10_FLOAT: 1127ec681f3Smrg return PAN_FORMAT_PACK; 1137ec681f3Smrg default: 1147ec681f3Smrg return PAN_FORMAT_NATIVE; 1157ec681f3Smrg } 1167ec681f3Smrg } 1177ec681f3Smrg 1187ec681f3Smrg /* Otherwise, we can do native */ 1197ec681f3Smrg return PAN_FORMAT_NATIVE; 1207ec681f3Smrg} 1217ec681f3Smrg 1227ec681f3Smrgstatic enum pan_format_class 1237ec681f3Smrgpan_format_class_store(const struct util_format_description *desc, unsigned quirks) 1247ec681f3Smrg{ 1257ec681f3Smrg /* Check if we can do anything better than software architecturally */ 1267ec681f3Smrg if (quirks & MIDGARD_NO_TYPED_BLEND_STORES) { 1277ec681f3Smrg return (quirks & NO_BLEND_PACKS) 1287ec681f3Smrg ? PAN_FORMAT_SOFTWARE : PAN_FORMAT_PACK; 1297ec681f3Smrg } 1307ec681f3Smrg 1317ec681f3Smrg return PAN_FORMAT_NATIVE; 1327ec681f3Smrg} 1337ec681f3Smrg 1347ec681f3Smrg/* Convenience method */ 1357ec681f3Smrg 1367ec681f3Smrgstatic enum pan_format_class 1377ec681f3Smrgpan_format_class(const struct util_format_description *desc, unsigned quirks, bool is_store) 1387ec681f3Smrg{ 1397ec681f3Smrg if (is_store) 1407ec681f3Smrg return pan_format_class_store(desc, quirks); 1417ec681f3Smrg else 1427ec681f3Smrg return pan_format_class_load(desc, quirks); 1437ec681f3Smrg} 1447ec681f3Smrg 1457ec681f3Smrg/* Software packs/unpacks, by format class. Packs take in the pixel value typed 1467ec681f3Smrg * as `pan_unpacked_type_for_format` of the format and return an i32vec4 1477ec681f3Smrg * suitable for storing (with components replicated to fill). Unpacks do the 1487ec681f3Smrg * reverse but cannot rely on replication. */ 1497ec681f3Smrg 1507ec681f3Smrgstatic nir_ssa_def * 1517ec681f3Smrgpan_replicate(nir_builder *b, nir_ssa_def *v, unsigned num_components) 1527ec681f3Smrg{ 1537ec681f3Smrg nir_ssa_def *replicated[4]; 1547ec681f3Smrg 1557ec681f3Smrg for (unsigned i = 0; i < 4; ++i) 1567ec681f3Smrg replicated[i] = nir_channel(b, v, i % num_components); 1577ec681f3Smrg 1587ec681f3Smrg return nir_vec(b, replicated, 4); 1597ec681f3Smrg} 1607ec681f3Smrg 1617ec681f3Smrgstatic nir_ssa_def * 1627ec681f3Smrgpan_unpack_pure_32(nir_builder *b, nir_ssa_def *pack, unsigned num_components) 1637ec681f3Smrg{ 1647ec681f3Smrg return nir_channels(b, pack, (1 << num_components) - 1); 1657ec681f3Smrg} 1667ec681f3Smrg 1677ec681f3Smrg/* Pure x16 formats are x16 unpacked, so it's similar, but we need to pack 1687ec681f3Smrg * upper/lower halves of course */ 1697ec681f3Smrg 1707ec681f3Smrgstatic nir_ssa_def * 1717ec681f3Smrgpan_pack_pure_16(nir_builder *b, nir_ssa_def *v, unsigned num_components) 1727ec681f3Smrg{ 1737ec681f3Smrg nir_ssa_def *v4 = pan_replicate(b, v, num_components); 1747ec681f3Smrg 1757ec681f3Smrg nir_ssa_def *lo = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 0)); 1767ec681f3Smrg nir_ssa_def *hi = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 2)); 1777ec681f3Smrg 1787ec681f3Smrg return nir_vec4(b, lo, hi, lo, hi); 1797ec681f3Smrg} 1807ec681f3Smrg 1817ec681f3Smrgstatic nir_ssa_def * 1827ec681f3Smrgpan_unpack_pure_16(nir_builder *b, nir_ssa_def *pack, unsigned num_components) 1837ec681f3Smrg{ 1847ec681f3Smrg nir_ssa_def *unpacked[4]; 1857ec681f3Smrg 1867ec681f3Smrg assert(num_components <= 4); 1877ec681f3Smrg 1887ec681f3Smrg for (unsigned i = 0; i < num_components; i += 2) { 1897ec681f3Smrg nir_ssa_def *halves = 1907ec681f3Smrg nir_unpack_32_2x16(b, nir_channel(b, pack, i >> 1)); 1917ec681f3Smrg 1927ec681f3Smrg unpacked[i + 0] = nir_channel(b, halves, 0); 1937ec681f3Smrg unpacked[i + 1] = nir_channel(b, halves, 1); 1947ec681f3Smrg } 1957ec681f3Smrg 1967ec681f3Smrg return nir_pad_vec4(b, nir_vec(b, unpacked, num_components)); 1977ec681f3Smrg} 1987ec681f3Smrg 1997ec681f3Smrgstatic nir_ssa_def * 2007ec681f3Smrgpan_pack_reorder(nir_builder *b, 2017ec681f3Smrg const struct util_format_description *desc, 2027ec681f3Smrg nir_ssa_def *v) 2037ec681f3Smrg{ 2047ec681f3Smrg unsigned swizzle[4] = { 0, 1, 2, 3 }; 2057ec681f3Smrg 2067ec681f3Smrg for (unsigned i = 0; i < v->num_components; i++) { 2077ec681f3Smrg if (desc->swizzle[i] <= PIPE_SWIZZLE_W) 2087ec681f3Smrg swizzle[i] = desc->swizzle[i]; 2097ec681f3Smrg } 2107ec681f3Smrg 2117ec681f3Smrg return nir_swizzle(b, v, swizzle, v->num_components); 2127ec681f3Smrg} 2137ec681f3Smrg 2147ec681f3Smrgstatic nir_ssa_def * 2157ec681f3Smrgpan_unpack_reorder(nir_builder *b, 2167ec681f3Smrg const struct util_format_description *desc, 2177ec681f3Smrg nir_ssa_def *v) 2187ec681f3Smrg{ 2197ec681f3Smrg unsigned swizzle[4] = { 0, 1, 2, 3 }; 2207ec681f3Smrg 2217ec681f3Smrg for (unsigned i = 0; i < v->num_components; i++) { 2227ec681f3Smrg if (desc->swizzle[i] <= PIPE_SWIZZLE_W) 2237ec681f3Smrg swizzle[desc->swizzle[i]] = i; 2247ec681f3Smrg } 2257ec681f3Smrg 2267ec681f3Smrg return nir_swizzle(b, v, swizzle, v->num_components); 2277ec681f3Smrg} 2287ec681f3Smrg 2297ec681f3Smrgstatic nir_ssa_def * 2307ec681f3Smrgpan_replicate_4(nir_builder *b, nir_ssa_def *v) 2317ec681f3Smrg{ 2327ec681f3Smrg return nir_vec4(b, v, v, v, v); 2337ec681f3Smrg} 2347ec681f3Smrg 2357ec681f3Smrgstatic nir_ssa_def * 2367ec681f3Smrgpan_pack_pure_8(nir_builder *b, nir_ssa_def *v, unsigned num_components) 2377ec681f3Smrg{ 2387ec681f3Smrg return pan_replicate_4(b, nir_pack_32_4x8(b, pan_replicate(b, v, num_components))); 2397ec681f3Smrg} 2407ec681f3Smrg 2417ec681f3Smrgstatic nir_ssa_def * 2427ec681f3Smrgpan_unpack_pure_8(nir_builder *b, nir_ssa_def *pack, unsigned num_components) 2437ec681f3Smrg{ 2447ec681f3Smrg nir_ssa_def *unpacked = nir_unpack_32_4x8(b, nir_channel(b, pack, 0)); 2457ec681f3Smrg return nir_channels(b, unpacked, (1 << num_components) - 1); 2467ec681f3Smrg} 2477ec681f3Smrg 2487ec681f3Smrg/* For <= 8-bits per channel, [U,S]NORM formats are packed like [U,S]NORM 8, 2497ec681f3Smrg * with zeroes spacing out each component as needed */ 2507ec681f3Smrg 2517ec681f3Smrgstatic nir_ssa_def * 2527ec681f3Smrgpan_pack_norm(nir_builder *b, nir_ssa_def *v, 2537ec681f3Smrg unsigned x, unsigned y, unsigned z, unsigned w, 2547ec681f3Smrg bool is_signed) 2557ec681f3Smrg{ 2567ec681f3Smrg /* If a channel has N bits, 1.0 is encoded as 2^N - 1 for UNORMs and 2577ec681f3Smrg * 2^(N-1) - 1 for SNORMs */ 2587ec681f3Smrg nir_ssa_def *scales = 2597ec681f3Smrg is_signed ? 2607ec681f3Smrg nir_imm_vec4_16(b, 2617ec681f3Smrg (1 << (x - 1)) - 1, (1 << (y - 1)) - 1, 2627ec681f3Smrg (1 << (z - 1)) - 1, (1 << (w - 1)) - 1) : 2637ec681f3Smrg nir_imm_vec4_16(b, 2647ec681f3Smrg (1 << x) - 1, (1 << y) - 1, 2657ec681f3Smrg (1 << z) - 1, (1 << w) - 1); 2667ec681f3Smrg 2677ec681f3Smrg /* If a channel has N bits, we pad out to the byte by (8 - N) bits */ 2687ec681f3Smrg nir_ssa_def *shifts = nir_imm_ivec4(b, 8 - x, 8 - y, 8 - z, 8 - w); 2697ec681f3Smrg 2707ec681f3Smrg nir_ssa_def *clamped = 2717ec681f3Smrg is_signed ? 2727ec681f3Smrg nir_fsat_signed_mali(b, nir_pad_vec4(b, v)) : 2737ec681f3Smrg nir_fsat(b, nir_pad_vec4(b, v)); 2747ec681f3Smrg 2757ec681f3Smrg nir_ssa_def *f = nir_fmul(b, clamped, scales); 2767ec681f3Smrg nir_ssa_def *u8 = nir_f2u8(b, nir_fround_even(b, f)); 2777ec681f3Smrg nir_ssa_def *s = nir_ishl(b, u8, shifts); 2787ec681f3Smrg nir_ssa_def *repl = nir_pack_32_4x8(b, s); 2797ec681f3Smrg 2807ec681f3Smrg return pan_replicate_4(b, repl); 2817ec681f3Smrg} 2827ec681f3Smrg 2837ec681f3Smrgstatic nir_ssa_def * 2847ec681f3Smrgpan_pack_unorm(nir_builder *b, nir_ssa_def *v, 2857ec681f3Smrg unsigned x, unsigned y, unsigned z, unsigned w) 2867ec681f3Smrg{ 2877ec681f3Smrg return pan_pack_norm(b, v, x, y, z, w, false); 2887ec681f3Smrg} 2897ec681f3Smrg 2907ec681f3Smrgstatic nir_ssa_def * 2917ec681f3Smrgpan_pack_snorm(nir_builder *b, nir_ssa_def *v, 2927ec681f3Smrg unsigned x, unsigned y, unsigned z, unsigned w) 2937ec681f3Smrg{ 2947ec681f3Smrg return pan_pack_norm(b, v, x, y, z, w, true); 2957ec681f3Smrg} 2967ec681f3Smrg 2977ec681f3Smrg/* RGB10_A2 is packed in the tilebuffer as the bottom 3 bytes being the top 2987ec681f3Smrg * 8-bits of RGB and the top byte being RGBA as 2-bits packed. As imirkin 2997ec681f3Smrg * pointed out, this means free conversion to RGBX8 */ 3007ec681f3Smrg 3017ec681f3Smrgstatic nir_ssa_def * 3027ec681f3Smrgpan_pack_unorm_1010102(nir_builder *b, nir_ssa_def *v) 3037ec681f3Smrg{ 3047ec681f3Smrg nir_ssa_def *scale = nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0); 3057ec681f3Smrg nir_ssa_def *s = nir_f2u32(b, nir_fround_even(b, nir_fmul(b, nir_fsat(b, v), scale))); 3067ec681f3Smrg 3077ec681f3Smrg nir_ssa_def *top8 = nir_ushr(b, s, nir_imm_ivec4(b, 0x2, 0x2, 0x2, 0x2)); 3087ec681f3Smrg nir_ssa_def *top8_rgb = nir_pack_32_4x8(b, nir_u2u8(b, top8)); 3097ec681f3Smrg 3107ec681f3Smrg nir_ssa_def *bottom2 = nir_iand(b, s, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3)); 3117ec681f3Smrg 3127ec681f3Smrg nir_ssa_def *top = 3137ec681f3Smrg nir_ior(b, 3147ec681f3Smrg nir_ior(b, 3157ec681f3Smrg nir_ishl(b, nir_channel(b, bottom2, 0), nir_imm_int(b, 24 + 0)), 3167ec681f3Smrg nir_ishl(b, nir_channel(b, bottom2, 1), nir_imm_int(b, 24 + 2))), 3177ec681f3Smrg nir_ior(b, 3187ec681f3Smrg nir_ishl(b, nir_channel(b, bottom2, 2), nir_imm_int(b, 24 + 4)), 3197ec681f3Smrg nir_ishl(b, nir_channel(b, bottom2, 3), nir_imm_int(b, 24 + 6)))); 3207ec681f3Smrg 3217ec681f3Smrg nir_ssa_def *p = nir_ior(b, top, top8_rgb); 3227ec681f3Smrg return pan_replicate_4(b, p); 3237ec681f3Smrg} 3247ec681f3Smrg 3257ec681f3Smrg/* On the other hand, the pure int RGB10_A2 is identical to the spec */ 3267ec681f3Smrg 3277ec681f3Smrgstatic nir_ssa_def * 3287ec681f3Smrgpan_pack_int_1010102(nir_builder *b, nir_ssa_def *v, bool is_signed) 3297ec681f3Smrg{ 3307ec681f3Smrg v = nir_u2u32(b, v); 3317ec681f3Smrg 3327ec681f3Smrg /* Clamp the values */ 3337ec681f3Smrg if (is_signed) { 3347ec681f3Smrg v = nir_imin(b, v, nir_imm_ivec4(b, 511, 511, 511, 1)); 3357ec681f3Smrg v = nir_imax(b, v, nir_imm_ivec4(b, -512, -512, -512, -2)); 3367ec681f3Smrg } else { 3377ec681f3Smrg v = nir_umin(b, v, nir_imm_ivec4(b, 1023, 1023, 1023, 3)); 3387ec681f3Smrg } 3397ec681f3Smrg 3407ec681f3Smrg v = nir_ishl(b, v, nir_imm_ivec4(b, 0, 10, 20, 30)); 3417ec681f3Smrg v = nir_ior(b, 3427ec681f3Smrg nir_ior(b, nir_channel(b, v, 0), nir_channel(b, v, 1)), 3437ec681f3Smrg nir_ior(b, nir_channel(b, v, 2), nir_channel(b, v, 3))); 3447ec681f3Smrg 3457ec681f3Smrg return pan_replicate_4(b, v); 3467ec681f3Smrg} 3477ec681f3Smrg 3487ec681f3Smrgstatic nir_ssa_def * 3497ec681f3Smrgpan_unpack_int_1010102(nir_builder *b, nir_ssa_def *packed, bool is_signed) 3507ec681f3Smrg{ 3517ec681f3Smrg nir_ssa_def *v = pan_replicate_4(b, nir_channel(b, packed, 0)); 3527ec681f3Smrg 3537ec681f3Smrg /* Left shift all components so the sign bit is on the MSB, and 3547ec681f3Smrg * can be extended by ishr(). The ishl()+[u,i]shr() combination 3557ec681f3Smrg * sets all unused bits to 0 without requiring a mask. 3567ec681f3Smrg */ 3577ec681f3Smrg v = nir_ishl(b, v, nir_imm_ivec4(b, 22, 12, 2, 0)); 3587ec681f3Smrg 3597ec681f3Smrg if (is_signed) 3607ec681f3Smrg v = nir_ishr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30)); 3617ec681f3Smrg else 3627ec681f3Smrg v = nir_ushr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30)); 3637ec681f3Smrg 3647ec681f3Smrg return nir_i2i16(b, v); 3657ec681f3Smrg} 3667ec681f3Smrg 3677ec681f3Smrg/* NIR means we can *finally* catch a break */ 3687ec681f3Smrg 3697ec681f3Smrgstatic nir_ssa_def * 3707ec681f3Smrgpan_pack_r11g11b10(nir_builder *b, nir_ssa_def *v) 3717ec681f3Smrg{ 3727ec681f3Smrg return pan_replicate_4(b, nir_format_pack_11f11f10f(b, 3737ec681f3Smrg nir_f2f32(b, v))); 3747ec681f3Smrg} 3757ec681f3Smrg 3767ec681f3Smrgstatic nir_ssa_def * 3777ec681f3Smrgpan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v) 3787ec681f3Smrg{ 3797ec681f3Smrg nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0)); 3807ec681f3Smrg nir_ssa_def *f16 = nir_f2fmp(b, f32); 3817ec681f3Smrg 3827ec681f3Smrg /* Extend to vec4 with alpha */ 3837ec681f3Smrg nir_ssa_def *components[4] = { 3847ec681f3Smrg nir_channel(b, f16, 0), 3857ec681f3Smrg nir_channel(b, f16, 1), 3867ec681f3Smrg nir_channel(b, f16, 2), 3877ec681f3Smrg nir_imm_float16(b, 1.0) 3887ec681f3Smrg }; 3897ec681f3Smrg 3907ec681f3Smrg return nir_vec(b, components, 4); 3917ec681f3Smrg} 3927ec681f3Smrg 3937ec681f3Smrg/* Wrapper around sRGB conversion */ 3947ec681f3Smrg 3957ec681f3Smrgstatic nir_ssa_def * 3967ec681f3Smrgpan_linear_to_srgb(nir_builder *b, nir_ssa_def *linear) 3977ec681f3Smrg{ 3987ec681f3Smrg nir_ssa_def *rgb = nir_channels(b, linear, 0x7); 3997ec681f3Smrg 4007ec681f3Smrg /* TODO: fp16 native conversion */ 4017ec681f3Smrg nir_ssa_def *srgb = nir_f2fmp(b, 4027ec681f3Smrg nir_format_linear_to_srgb(b, nir_f2f32(b, rgb))); 4037ec681f3Smrg 4047ec681f3Smrg nir_ssa_def *comp[4] = { 4057ec681f3Smrg nir_channel(b, srgb, 0), 4067ec681f3Smrg nir_channel(b, srgb, 1), 4077ec681f3Smrg nir_channel(b, srgb, 2), 4087ec681f3Smrg nir_channel(b, linear, 3), 4097ec681f3Smrg }; 4107ec681f3Smrg 4117ec681f3Smrg return nir_vec(b, comp, 4); 4127ec681f3Smrg} 4137ec681f3Smrg 4147ec681f3Smrg/* Generic dispatches for un/pack regardless of format */ 4157ec681f3Smrg 4167ec681f3Smrgstatic nir_ssa_def * 4177ec681f3Smrgpan_unpack(nir_builder *b, 4187ec681f3Smrg const struct util_format_description *desc, 4197ec681f3Smrg nir_ssa_def *packed) 4207ec681f3Smrg{ 4217ec681f3Smrg if (desc->is_array) { 4227ec681f3Smrg int c = util_format_get_first_non_void_channel(desc->format); 4237ec681f3Smrg assert(c >= 0); 4247ec681f3Smrg struct util_format_channel_description d = desc->channel[c]; 4257ec681f3Smrg 4267ec681f3Smrg if (d.size == 32 || d.size == 16) { 4277ec681f3Smrg assert(!d.normalized); 4287ec681f3Smrg assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer); 4297ec681f3Smrg 4307ec681f3Smrg return d.size == 32 ? pan_unpack_pure_32(b, packed, desc->nr_channels) : 4317ec681f3Smrg pan_unpack_pure_16(b, packed, desc->nr_channels); 4327ec681f3Smrg } else if (d.size == 8) { 4337ec681f3Smrg assert(d.pure_integer); 4347ec681f3Smrg return pan_unpack_pure_8(b, packed, desc->nr_channels); 4357ec681f3Smrg } else { 4367ec681f3Smrg unreachable("Unrenderable size"); 4377ec681f3Smrg } 4387ec681f3Smrg } 4397ec681f3Smrg 4407ec681f3Smrg switch (desc->format) { 4417ec681f3Smrg case PIPE_FORMAT_R10G10B10A2_UINT: 4427ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_UINT: 4437ec681f3Smrg return pan_unpack_int_1010102(b, packed, false); 4447ec681f3Smrg case PIPE_FORMAT_R10G10B10A2_SINT: 4457ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_SINT: 4467ec681f3Smrg return pan_unpack_int_1010102(b, packed, true); 4477ec681f3Smrg case PIPE_FORMAT_R11G11B10_FLOAT: 4487ec681f3Smrg return pan_unpack_r11g11b10(b, packed); 4497ec681f3Smrg default: 4507ec681f3Smrg break; 4517ec681f3Smrg } 4527ec681f3Smrg 4537ec681f3Smrg fprintf(stderr, "%s\n", desc->name); 4547ec681f3Smrg unreachable("Unknown format"); 4557ec681f3Smrg} 4567ec681f3Smrg 4577ec681f3Smrgstatic nir_ssa_def * 4587ec681f3Smrgpan_pack(nir_builder *b, 4597ec681f3Smrg const struct util_format_description *desc, 4607ec681f3Smrg nir_ssa_def *unpacked) 4617ec681f3Smrg{ 4627ec681f3Smrg if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 4637ec681f3Smrg unpacked = pan_linear_to_srgb(b, unpacked); 4647ec681f3Smrg 4657ec681f3Smrg if (util_format_is_unorm8(desc)) 4667ec681f3Smrg return pan_pack_unorm(b, unpacked, 8, 8, 8, 8); 4677ec681f3Smrg 4687ec681f3Smrg if (util_format_is_snorm8(desc->format)) 4697ec681f3Smrg return pan_pack_snorm(b, unpacked, 8, 8, 8, 8); 4707ec681f3Smrg 4717ec681f3Smrg if (desc->is_array) { 4727ec681f3Smrg int c = util_format_get_first_non_void_channel(desc->format); 4737ec681f3Smrg assert(c >= 0); 4747ec681f3Smrg struct util_format_channel_description d = desc->channel[c]; 4757ec681f3Smrg 4767ec681f3Smrg if (d.size == 32 || d.size == 16) { 4777ec681f3Smrg assert(!d.normalized); 4787ec681f3Smrg assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer); 4797ec681f3Smrg 4807ec681f3Smrg return d.size == 32 ? 4817ec681f3Smrg pan_replicate(b, unpacked, desc->nr_channels) : 4827ec681f3Smrg pan_pack_pure_16(b, unpacked, desc->nr_channels); 4837ec681f3Smrg } else if (d.size == 8) { 4847ec681f3Smrg assert(d.pure_integer); 4857ec681f3Smrg return pan_pack_pure_8(b, unpacked, desc->nr_channels); 4867ec681f3Smrg } else { 4877ec681f3Smrg unreachable("Unrenderable size"); 4887ec681f3Smrg } 4897ec681f3Smrg } 4907ec681f3Smrg 4917ec681f3Smrg switch (desc->format) { 4927ec681f3Smrg case PIPE_FORMAT_B4G4R4A4_UNORM: 4937ec681f3Smrg case PIPE_FORMAT_B4G4R4X4_UNORM: 4947ec681f3Smrg case PIPE_FORMAT_A4R4_UNORM: 4957ec681f3Smrg case PIPE_FORMAT_R4A4_UNORM: 4967ec681f3Smrg case PIPE_FORMAT_A4B4G4R4_UNORM: 4977ec681f3Smrg case PIPE_FORMAT_R4G4B4A4_UNORM: 4987ec681f3Smrg return pan_pack_unorm(b, unpacked, 4, 4, 4, 4); 4997ec681f3Smrg case PIPE_FORMAT_B5G5R5A1_UNORM: 5007ec681f3Smrg case PIPE_FORMAT_R5G5B5A1_UNORM: 5017ec681f3Smrg return pan_pack_unorm(b, unpacked, 5, 6, 5, 1); 5027ec681f3Smrg case PIPE_FORMAT_R5G6B5_UNORM: 5037ec681f3Smrg case PIPE_FORMAT_B5G6R5_UNORM: 5047ec681f3Smrg return pan_pack_unorm(b, unpacked, 5, 6, 5, 0); 5057ec681f3Smrg case PIPE_FORMAT_R10G10B10A2_UNORM: 5067ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_UNORM: 5077ec681f3Smrg return pan_pack_unorm_1010102(b, unpacked); 5087ec681f3Smrg case PIPE_FORMAT_R10G10B10A2_UINT: 5097ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_UINT: 5107ec681f3Smrg return pan_pack_int_1010102(b, unpacked, false); 5117ec681f3Smrg case PIPE_FORMAT_R10G10B10A2_SINT: 5127ec681f3Smrg case PIPE_FORMAT_B10G10R10A2_SINT: 5137ec681f3Smrg return pan_pack_int_1010102(b, unpacked, true); 5147ec681f3Smrg case PIPE_FORMAT_R11G11B10_FLOAT: 5157ec681f3Smrg return pan_pack_r11g11b10(b, unpacked); 5167ec681f3Smrg default: 5177ec681f3Smrg break; 5187ec681f3Smrg } 5197ec681f3Smrg 5207ec681f3Smrg fprintf(stderr, "%s\n", desc->name); 5217ec681f3Smrg unreachable("Unknown format"); 5227ec681f3Smrg} 5237ec681f3Smrg 5247ec681f3Smrgstatic void 5257ec681f3Smrgpan_lower_fb_store(nir_shader *shader, 5267ec681f3Smrg nir_builder *b, 5277ec681f3Smrg nir_intrinsic_instr *intr, 5287ec681f3Smrg const struct util_format_description *desc, 5297ec681f3Smrg bool reorder_comps, 5307ec681f3Smrg unsigned quirks) 5317ec681f3Smrg{ 5327ec681f3Smrg /* For stores, add conversion before */ 5337ec681f3Smrg nir_ssa_def *unpacked = nir_ssa_for_src(b, intr->src[1], 4); 5347ec681f3Smrg 5357ec681f3Smrg /* Re-order the components */ 5367ec681f3Smrg if (reorder_comps) 5377ec681f3Smrg unpacked = pan_pack_reorder(b, desc, unpacked); 5387ec681f3Smrg 5397ec681f3Smrg nir_ssa_def *packed = pan_pack(b, desc, unpacked); 5407ec681f3Smrg 5417ec681f3Smrg nir_store_raw_output_pan(b, packed); 5427ec681f3Smrg} 5437ec681f3Smrg 5447ec681f3Smrgstatic nir_ssa_def * 5457ec681f3Smrgpan_sample_id(nir_builder *b, int sample) 5467ec681f3Smrg{ 5477ec681f3Smrg return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b); 5487ec681f3Smrg} 5497ec681f3Smrg 5507ec681f3Smrgstatic void 5517ec681f3Smrgpan_lower_fb_load(nir_shader *shader, 5527ec681f3Smrg nir_builder *b, 5537ec681f3Smrg nir_intrinsic_instr *intr, 5547ec681f3Smrg const struct util_format_description *desc, 5557ec681f3Smrg bool reorder_comps, 5567ec681f3Smrg unsigned base, int sample, unsigned quirks) 5577ec681f3Smrg{ 5587ec681f3Smrg nir_ssa_def *packed = 5597ec681f3Smrg nir_load_raw_output_pan(b, 4, 32, pan_sample_id(b, sample), 5607ec681f3Smrg .base = base); 5617ec681f3Smrg 5627ec681f3Smrg /* Convert the raw value */ 5637ec681f3Smrg nir_ssa_def *unpacked = pan_unpack(b, desc, packed); 5647ec681f3Smrg 5657ec681f3Smrg /* Convert to the size of the load intrinsic. 5667ec681f3Smrg * 5677ec681f3Smrg * We can assume that the type will match with the framebuffer format: 5687ec681f3Smrg * 5697ec681f3Smrg * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says: 5707ec681f3Smrg * 5717ec681f3Smrg * If [UNORM or SNORM, convert to fixed-point]; otherwise no type 5727ec681f3Smrg * conversion is applied. If the values written by the fragment shader 5737ec681f3Smrg * do not match the format(s) of the corresponding color buffer(s), 5747ec681f3Smrg * the result is undefined. 5757ec681f3Smrg */ 5767ec681f3Smrg 5777ec681f3Smrg unsigned bits = nir_dest_bit_size(intr->dest); 5787ec681f3Smrg 5797ec681f3Smrg nir_alu_type src_type = nir_alu_type_get_base_type( 5807ec681f3Smrg pan_unpacked_type_for_format(desc)); 5817ec681f3Smrg 5827ec681f3Smrg unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits); 5837ec681f3Smrg unpacked = nir_pad_vector(b, unpacked, nir_dest_num_components(intr->dest)); 5847ec681f3Smrg 5857ec681f3Smrg /* Reorder the components */ 5867ec681f3Smrg if (reorder_comps) 5877ec681f3Smrg unpacked = pan_unpack_reorder(b, desc, unpacked); 5887ec681f3Smrg 5897ec681f3Smrg nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, unpacked, &intr->instr); 5907ec681f3Smrg} 5917ec681f3Smrg 5927ec681f3Smrgbool 5937ec681f3Smrgpan_lower_framebuffer(nir_shader *shader, const enum pipe_format *rt_fmts, 5947ec681f3Smrg uint8_t raw_fmt_mask, bool is_blend, unsigned quirks) 5957ec681f3Smrg{ 5967ec681f3Smrg if (shader->info.stage != MESA_SHADER_FRAGMENT) 5977ec681f3Smrg return false; 5987ec681f3Smrg 5997ec681f3Smrg bool progress = false; 6007ec681f3Smrg 6017ec681f3Smrg nir_foreach_function(func, shader) { 6027ec681f3Smrg nir_foreach_block(block, func->impl) { 6037ec681f3Smrg nir_foreach_instr_safe(instr, block) { 6047ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 6057ec681f3Smrg continue; 6067ec681f3Smrg 6077ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 6087ec681f3Smrg 6097ec681f3Smrg bool is_load = intr->intrinsic == nir_intrinsic_load_deref; 6107ec681f3Smrg bool is_store = intr->intrinsic == nir_intrinsic_store_deref; 6117ec681f3Smrg 6127ec681f3Smrg if (!(is_load || (is_store && is_blend))) 6137ec681f3Smrg continue; 6147ec681f3Smrg 6157ec681f3Smrg nir_variable *var = nir_intrinsic_get_var(intr, 0); 6167ec681f3Smrg 6177ec681f3Smrg if (var->data.mode != nir_var_shader_out) 6187ec681f3Smrg continue; 6197ec681f3Smrg 6207ec681f3Smrg if (var->data.location < FRAG_RESULT_DATA0) 6217ec681f3Smrg continue; 6227ec681f3Smrg 6237ec681f3Smrg unsigned base = var->data.driver_location; 6247ec681f3Smrg unsigned rt = var->data.location - FRAG_RESULT_DATA0; 6257ec681f3Smrg 6267ec681f3Smrg if (rt_fmts[rt] == PIPE_FORMAT_NONE) 6277ec681f3Smrg continue; 6287ec681f3Smrg 6297ec681f3Smrg const struct util_format_description *desc = 6307ec681f3Smrg util_format_description(rt_fmts[rt]); 6317ec681f3Smrg 6327ec681f3Smrg enum pan_format_class fmt_class = 6337ec681f3Smrg pan_format_class(desc, quirks, is_store); 6347ec681f3Smrg 6357ec681f3Smrg /* Don't lower */ 6367ec681f3Smrg if (fmt_class == PAN_FORMAT_NATIVE) 6377ec681f3Smrg continue; 6387ec681f3Smrg 6397ec681f3Smrg /* EXT_shader_framebuffer_fetch requires 6407ec681f3Smrg * per-sample loads. 6417ec681f3Smrg * MSAA blend shaders are not yet handled, so 6427ec681f3Smrg * for now always load sample 0. */ 6437ec681f3Smrg int sample = is_blend ? 0 : -1; 6447ec681f3Smrg bool reorder_comps = raw_fmt_mask & BITFIELD_BIT(rt); 6457ec681f3Smrg 6467ec681f3Smrg nir_builder b; 6477ec681f3Smrg nir_builder_init(&b, func->impl); 6487ec681f3Smrg 6497ec681f3Smrg if (is_store) { 6507ec681f3Smrg b.cursor = nir_before_instr(instr); 6517ec681f3Smrg pan_lower_fb_store(shader, &b, intr, desc, reorder_comps, quirks); 6527ec681f3Smrg } else { 6537ec681f3Smrg b.cursor = nir_after_instr(instr); 6547ec681f3Smrg pan_lower_fb_load(shader, &b, intr, desc, reorder_comps, base, sample, quirks); 6557ec681f3Smrg } 6567ec681f3Smrg 6577ec681f3Smrg nir_instr_remove(instr); 6587ec681f3Smrg 6597ec681f3Smrg progress = true; 6607ec681f3Smrg } 6617ec681f3Smrg } 6627ec681f3Smrg 6637ec681f3Smrg nir_metadata_preserve(func->impl, nir_metadata_block_index | 6647ec681f3Smrg nir_metadata_dominance); 6657ec681f3Smrg } 6667ec681f3Smrg 6677ec681f3Smrg return progress; 6687ec681f3Smrg} 669