1/*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27#include <stdio.h>
28#include <assert.h>
29#include <stdlib.h>
30#include <string.h>
31#include <limits.h>
32#include "util/macros.h"
33#include "util/u_math.h"
34#include "lcra.h"
35
36/* This module is the reference implementation of "Linearly Constrained
37 * Register Allocation". The paper is available in PDF form
38 * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39 * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
40 */
41
42struct lcra_state *
43lcra_alloc_equations(
44                unsigned node_count, unsigned class_count)
45{
46        struct lcra_state *l = calloc(1, sizeof(*l));
47
48        l->node_count = node_count;
49        l->class_count = class_count;
50
51        l->alignment = calloc(sizeof(l->alignment[0]), node_count);
52        l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
53        l->modulus = calloc(sizeof(l->modulus[0]), node_count);
54        l->class = calloc(sizeof(l->class[0]), node_count);
55        l->class_start = calloc(sizeof(l->class_start[0]), class_count);
56        l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
57        l->class_size = calloc(sizeof(l->class_size[0]), class_count);
58        l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
59        l->solutions = calloc(sizeof(l->solutions[0]), node_count);
60
61        memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
62
63        return l;
64}
65
66void
67lcra_free(struct lcra_state *l)
68{
69        if (!l)
70                return;
71
72        free(l->alignment);
73        free(l->linear);
74        free(l->modulus);
75        free(l->class);
76        free(l->class_start);
77        free(l->class_disjoint);
78        free(l->class_size);
79        free(l->spill_cost);
80        free(l->solutions);
81
82        free(l);
83}
84
85void
86lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound)
87{
88        l->alignment[node] = (align_log2 + 1) | (bound << 16);
89}
90
91void
92lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
93{
94        l->class_disjoint[(c1 * l->class_count) + c2] = true;
95        l->class_disjoint[(c2 * l->class_count) + c1] = true;
96}
97
98void
99lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
100{
101        if (node < l->node_count && l->alignment[node]) {
102                unsigned BA = l->alignment[node];
103                unsigned alignment = (BA & 0xffff) - 1;
104                unsigned bound = BA >> 16;
105                l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
106        }
107}
108
109void
110lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j)
111{
112        if (i == j)
113                return;
114
115        if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]])
116                return;
117
118        uint32_t constraint_fw = 0;
119        uint32_t constraint_bw = 0;
120
121        for (unsigned D = 0; D < 16; ++D) {
122                if (cmask_i & (cmask_j << D)) {
123                        constraint_bw |= (1 << (15 + D));
124                        constraint_fw |= (1 << (15 - D));
125                }
126
127                if (cmask_i & (cmask_j >> D)) {
128                        constraint_fw |= (1 << (15 + D));
129                        constraint_bw |= (1 << (15 - D));
130                }
131        }
132
133        l->linear[j * l->node_count + i] |= constraint_fw;
134        l->linear[i * l->node_count + j] |= constraint_bw;
135}
136
137static bool
138lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
139{
140        unsigned *row = &l->linear[i * l->node_count];
141        signed constant = solutions[i];
142
143        for (unsigned j = 0; j < l->node_count; ++j) {
144                if (solutions[j] == ~0) continue;
145
146                signed lhs = solutions[j] - constant;
147
148                if (lhs < -15 || lhs > 15)
149                        continue;
150
151                if (row[j] & (1 << (lhs + 15)))
152                        return false;
153        }
154
155        return true;
156}
157
158bool
159lcra_solve(struct lcra_state *l)
160{
161        for (unsigned step = 0; step < l->node_count; ++step) {
162                if (l->solutions[step] != ~0) continue;
163                if (l->alignment[step] == 0) continue;
164
165                unsigned _class = l->class[step];
166                unsigned class_start = l->class_start[_class];
167
168                unsigned BA = l->alignment[step];
169                unsigned shift = (BA & 0xffff) - 1;
170                unsigned bound = BA >> 16;
171
172                unsigned P = bound >> shift;
173                unsigned Q = l->modulus[step];
174                unsigned r_max = l->class_size[_class];
175                unsigned k_max = r_max >> shift;
176                unsigned m_max = k_max / P;
177                bool succ = false;
178
179                for (unsigned m = 0; m < m_max; ++m) {
180                        for (unsigned n = 0; n < Q; ++n) {
181                                l->solutions[step] = ((m * P + n) << shift) + class_start;
182                                succ = lcra_test_linear(l, l->solutions, step);
183
184                                if (succ) break;
185                        }
186
187                        if (succ) break;
188                }
189
190                /* Out of registers - prepare to spill */
191                if (!succ) {
192                        l->spill_class = l->class[step];
193                        return false;
194                }
195        }
196
197        return true;
198}
199
200/* Register spilling is implemented with a cost-benefit system. Costs are set
201 * by the user. Benefits are calculated from the constraints. */
202
203void
204lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
205{
206        if (node < l->node_count)
207                l->spill_cost[node] = cost;
208}
209
210static unsigned
211lcra_count_constraints(struct lcra_state *l, unsigned i)
212{
213        unsigned count = 0;
214        unsigned *constraints = &l->linear[i * l->node_count];
215
216        for (unsigned j = 0; j < l->node_count; ++j)
217                count += util_bitcount(constraints[j]);
218
219        return count;
220}
221
222signed
223lcra_get_best_spill_node(struct lcra_state *l)
224{
225        /* If there are no constraints on a node, do not pick it to spill under
226         * any circumstance, or else we would hang rather than fail RA */
227        float best_benefit = 0.0;
228        signed best_node = -1;
229
230        for (unsigned i = 0; i < l->node_count; ++i) {
231                /* Find spillable nodes */
232                if (l->class[i] != l->spill_class) continue;
233                if (l->spill_cost[i] < 0) continue;
234
235                /* Adapted from Chaitin's heuristic */
236                float constraints = lcra_count_constraints(l, i);
237                float cost = (l->spill_cost[i] + 1);
238                float benefit = constraints / cost;
239
240                if (benefit > best_benefit) {
241                        best_benefit = benefit;
242                        best_node = i;
243                }
244        }
245
246        return best_node;
247}
248