1/* 2 * Copyright (C) 2021 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "compiler.h" 25#include "bi_builder.h" 26 27/* This optimization pass, intended to run once after code emission but before 28 * copy propagation, analyzes direct word-aligned UBO reads and promotes a 29 * subset to moves from FAU. It is the sole populator of the UBO push data 30 * structure returned back to the command stream. */ 31 32static bool 33bi_is_ubo(bi_instr *ins) 34{ 35 return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) && 36 (ins->seg == BI_SEG_UBO); 37} 38 39static bool 40bi_is_direct_aligned_ubo(bi_instr *ins) 41{ 42 return bi_is_ubo(ins) && 43 (ins->src[0].type == BI_INDEX_CONSTANT) && 44 (ins->src[1].type == BI_INDEX_CONSTANT) && 45 ((ins->src[0].value & 0x3) == 0); 46} 47 48/* Represents use data for a single UBO */ 49 50#define MAX_UBO_WORDS (65536 / 16) 51 52struct bi_ubo_block { 53 BITSET_DECLARE(pushed, MAX_UBO_WORDS); 54 uint8_t range[MAX_UBO_WORDS]; 55}; 56 57struct bi_ubo_analysis { 58 /* Per block analysis */ 59 unsigned nr_blocks; 60 struct bi_ubo_block *blocks; 61}; 62 63static struct bi_ubo_analysis 64bi_analyze_ranges(bi_context *ctx) 65{ 66 struct bi_ubo_analysis res = { 67 .nr_blocks = ctx->nir->info.num_ubos + 1, 68 }; 69 70 res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block)); 71 72 bi_foreach_instr_global(ctx, ins) { 73 if (!bi_is_direct_aligned_ubo(ins)) continue; 74 75 unsigned ubo = ins->src[1].value; 76 unsigned word = ins->src[0].value / 4; 77 unsigned channels = bi_opcode_props[ins->op].sr_count; 78 79 assert(ubo < res.nr_blocks); 80 assert(channels > 0 && channels <= 4); 81 82 if (word >= MAX_UBO_WORDS) continue; 83 84 /* Must use max if the same base is read with different channel 85 * counts, which is possible with nir_opt_shrink_vectors */ 86 uint8_t *range = res.blocks[ubo].range; 87 range[word] = MAX2(range[word], channels); 88 } 89 90 return res; 91} 92 93/* Select UBO words to push. A sophisticated implementation would consider the 94 * number of uses and perhaps the control flow to estimate benefit. This is not 95 * sophisticated. Select from the last UBO first to prioritize sysvals. */ 96 97static void 98bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis) 99{ 100 for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) { 101 struct bi_ubo_block *block = &analysis->blocks[ubo]; 102 103 for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) { 104 unsigned range = block->range[r]; 105 106 /* Don't push something we don't access */ 107 if (range == 0) continue; 108 109 /* Don't push more than possible */ 110 if (push->count > PAN_MAX_PUSH - range) 111 return; 112 113 for (unsigned offs = 0; offs < range; ++offs) { 114 struct panfrost_ubo_word word = { 115 .ubo = ubo, 116 .offset = (r + offs) * 4 117 }; 118 119 push->words[push->count++] = word; 120 } 121 122 /* Mark it as pushed so we can rewrite */ 123 BITSET_SET(block->pushed, r); 124 } 125 } 126} 127 128void 129bi_opt_push_ubo(bi_context *ctx) 130{ 131 /* This pass only runs once */ 132 assert(ctx->info->push.count == 0); 133 134 struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx); 135 bi_pick_ubo(&ctx->info->push, &analysis); 136 137 ctx->ubo_mask = 0; 138 139 bi_foreach_instr_global_safe(ctx, ins) { 140 if (!bi_is_ubo(ins)) continue; 141 142 unsigned ubo = ins->src[1].value; 143 unsigned offset = ins->src[0].value; 144 145 if (!bi_is_direct_aligned_ubo(ins)) { 146 /* The load can't be pushed, so this UBO needs to be 147 * uploaded conventionally */ 148 if (ins->src[1].type == BI_INDEX_CONSTANT) 149 ctx->ubo_mask |= BITSET_BIT(ubo); 150 else 151 ctx->ubo_mask = ~0; 152 153 continue; 154 } 155 156 /* Check if we decided to push this */ 157 assert(ubo < analysis.nr_blocks); 158 if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) { 159 ctx->ubo_mask |= BITSET_BIT(ubo); 160 continue; 161 } 162 163 /* Replace the UBO load with moves from FAU */ 164 bi_builder b = bi_init_builder(ctx, bi_after_instr(ins)); 165 166 unsigned channels = bi_opcode_props[ins->op].sr_count; 167 168 for (unsigned w = 0; w < channels; ++w) { 169 /* FAU is grouped in pairs (2 x 4-byte) */ 170 unsigned base = 171 pan_lookup_pushed_ubo(&ctx->info->push, ubo, 172 (offset + 4 * w)); 173 174 unsigned fau_idx = (base >> 1); 175 unsigned fau_hi = (base & 1); 176 177 bi_mov_i32_to(&b, 178 bi_word(ins->dest[0], w), 179 bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi)); 180 } 181 182 bi_remove_instruction(ins); 183 } 184 185 free(analysis.blocks); 186} 187