1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors (Collabora): 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26 27#include <stdio.h> 28#include <assert.h> 29#include <stdlib.h> 30#include <string.h> 31#include <limits.h> 32#include "util/macros.h" 33#include "util/u_math.h" 34#include "lcra.h" 35 36/* This module is the reference implementation of "Linearly Constrained 37 * Register Allocation". The paper is available in PDF form 38 * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX 39 * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md) 40 */ 41 42struct lcra_state * 43lcra_alloc_equations( 44 unsigned node_count, unsigned class_count) 45{ 46 struct lcra_state *l = calloc(1, sizeof(*l)); 47 48 l->node_count = node_count; 49 l->class_count = class_count; 50 51 l->alignment = calloc(sizeof(l->alignment[0]), node_count); 52 l->linear = calloc(sizeof(l->linear[0]), node_count * node_count); 53 l->modulus = calloc(sizeof(l->modulus[0]), node_count); 54 l->class = calloc(sizeof(l->class[0]), node_count); 55 l->class_start = calloc(sizeof(l->class_start[0]), class_count); 56 l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count); 57 l->class_size = calloc(sizeof(l->class_size[0]), class_count); 58 l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count); 59 l->solutions = calloc(sizeof(l->solutions[0]), node_count); 60 61 memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count); 62 63 return l; 64} 65 66void 67lcra_free(struct lcra_state *l) 68{ 69 if (!l) 70 return; 71 72 free(l->alignment); 73 free(l->linear); 74 free(l->modulus); 75 free(l->class); 76 free(l->class_start); 77 free(l->class_disjoint); 78 free(l->class_size); 79 free(l->spill_cost); 80 free(l->solutions); 81 82 free(l); 83} 84 85void 86lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound) 87{ 88 l->alignment[node] = (align_log2 + 1) | (bound << 16); 89} 90 91void 92lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2) 93{ 94 l->class_disjoint[(c1 * l->class_count) + c2] = true; 95 l->class_disjoint[(c2 * l->class_count) + c1] = true; 96} 97 98void 99lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len) 100{ 101 if (node < l->node_count && l->alignment[node]) { 102 unsigned BA = l->alignment[node]; 103 unsigned alignment = (BA & 0xffff) - 1; 104 unsigned bound = BA >> 16; 105 l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment); 106 } 107} 108 109void 110lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j) 111{ 112 if (i == j) 113 return; 114 115 if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]]) 116 return; 117 118 uint32_t constraint_fw = 0; 119 uint32_t constraint_bw = 0; 120 121 for (unsigned D = 0; D < 16; ++D) { 122 if (cmask_i & (cmask_j << D)) { 123 constraint_bw |= (1 << (15 + D)); 124 constraint_fw |= (1 << (15 - D)); 125 } 126 127 if (cmask_i & (cmask_j >> D)) { 128 constraint_fw |= (1 << (15 + D)); 129 constraint_bw |= (1 << (15 - D)); 130 } 131 } 132 133 l->linear[j * l->node_count + i] |= constraint_fw; 134 l->linear[i * l->node_count + j] |= constraint_bw; 135} 136 137static bool 138lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i) 139{ 140 unsigned *row = &l->linear[i * l->node_count]; 141 signed constant = solutions[i]; 142 143 for (unsigned j = 0; j < l->node_count; ++j) { 144 if (solutions[j] == ~0) continue; 145 146 signed lhs = solutions[j] - constant; 147 148 if (lhs < -15 || lhs > 15) 149 continue; 150 151 if (row[j] & (1 << (lhs + 15))) 152 return false; 153 } 154 155 return true; 156} 157 158bool 159lcra_solve(struct lcra_state *l) 160{ 161 for (unsigned step = 0; step < l->node_count; ++step) { 162 if (l->solutions[step] != ~0) continue; 163 if (l->alignment[step] == 0) continue; 164 165 unsigned _class = l->class[step]; 166 unsigned class_start = l->class_start[_class]; 167 168 unsigned BA = l->alignment[step]; 169 unsigned shift = (BA & 0xffff) - 1; 170 unsigned bound = BA >> 16; 171 172 unsigned P = bound >> shift; 173 unsigned Q = l->modulus[step]; 174 unsigned r_max = l->class_size[_class]; 175 unsigned k_max = r_max >> shift; 176 unsigned m_max = k_max / P; 177 bool succ = false; 178 179 for (unsigned m = 0; m < m_max; ++m) { 180 for (unsigned n = 0; n < Q; ++n) { 181 l->solutions[step] = ((m * P + n) << shift) + class_start; 182 succ = lcra_test_linear(l, l->solutions, step); 183 184 if (succ) break; 185 } 186 187 if (succ) break; 188 } 189 190 /* Out of registers - prepare to spill */ 191 if (!succ) { 192 l->spill_class = l->class[step]; 193 return false; 194 } 195 } 196 197 return true; 198} 199 200/* Register spilling is implemented with a cost-benefit system. Costs are set 201 * by the user. Benefits are calculated from the constraints. */ 202 203void 204lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost) 205{ 206 if (node < l->node_count) 207 l->spill_cost[node] = cost; 208} 209 210static unsigned 211lcra_count_constraints(struct lcra_state *l, unsigned i) 212{ 213 unsigned count = 0; 214 unsigned *constraints = &l->linear[i * l->node_count]; 215 216 for (unsigned j = 0; j < l->node_count; ++j) 217 count += util_bitcount(constraints[j]); 218 219 return count; 220} 221 222signed 223lcra_get_best_spill_node(struct lcra_state *l) 224{ 225 /* If there are no constraints on a node, do not pick it to spill under 226 * any circumstance, or else we would hang rather than fail RA */ 227 float best_benefit = 0.0; 228 signed best_node = -1; 229 230 for (unsigned i = 0; i < l->node_count; ++i) { 231 /* Find spillable nodes */ 232 if (l->class[i] != l->spill_class) continue; 233 if (l->spill_cost[i] < 0) continue; 234 235 /* Adapted from Chaitin's heuristic */ 236 float constraints = lcra_count_constraints(l, i); 237 float cost = (l->spill_cost[i] + 1); 238 float benefit = constraints / cost; 239 240 if (benefit > best_benefit) { 241 best_benefit = benefit; 242 best_node = i; 243 } 244 } 245 246 return best_node; 247} 248