1b8e80941Smrg/*
2b8e80941Smrg * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors:
24b8e80941Smrg *    Rob Clark <robclark@freedesktop.org>
25b8e80941Smrg */
26b8e80941Smrg
27b8e80941Smrg
28b8e80941Smrg#include "util/u_math.h"
29b8e80941Smrg
30b8e80941Smrg#include "ir3.h"
31b8e80941Smrg
32b8e80941Smrg/*
33b8e80941Smrg * A simple pass to do Sethi–Ullman numbering, as described in "Generalizations
34b8e80941Smrg * of the Sethi-Ullman algorithm for register allocation"[1].  This is used by
35b8e80941Smrg * the scheduler pass.
36b8e80941Smrg *
37b8e80941Smrg * TODO this could probably be more clever about flow control, ie. if a src
38b8e80941Smrg * is computed in multiple paths into a block, I think we should only have to
39b8e80941Smrg * consider the worst-case.
40b8e80941Smrg *
41b8e80941Smrg * [1] https://pdfs.semanticscholar.org/ae53/6010b214612c2571f483354c264b0b39c545.pdf
42b8e80941Smrg */
43b8e80941Smrg
44b8e80941Smrgstatic unsigned
45b8e80941Smrgnumber_instr(struct ir3_instruction *instr)
46b8e80941Smrg{
47b8e80941Smrg	if (ir3_instr_check_mark(instr))
48b8e80941Smrg		return instr->sun;
49b8e80941Smrg
50b8e80941Smrg	struct ir3_instruction *src;
51b8e80941Smrg	const unsigned n = __ssa_src_cnt(instr);
52b8e80941Smrg	unsigned a[n];
53b8e80941Smrg	unsigned b[n];
54b8e80941Smrg	unsigned i = 0;
55b8e80941Smrg
56b8e80941Smrg	/* TODO I think including false-deps in the calculation is the right
57b8e80941Smrg	 * thing to do:
58b8e80941Smrg	 */
59b8e80941Smrg	foreach_ssa_src_n(src, n, instr) {
60b8e80941Smrg		if (__is_false_dep(instr, n))
61b8e80941Smrg			continue;
62b8e80941Smrg		if (src->block != instr->block) {
63b8e80941Smrg			a[i] = 1;
64b8e80941Smrg		} else {
65b8e80941Smrg			a[i] = number_instr(src);
66b8e80941Smrg		}
67b8e80941Smrg		b[i] = dest_regs(src);
68b8e80941Smrg		i++;
69b8e80941Smrg	}
70b8e80941Smrg
71b8e80941Smrg	/*
72b8e80941Smrg	 * Rπ = max(aπ(1), bπ(1) + max(aπ(2), bπ(2) + max(..., bπ(k−1) + max(aπ(k), bπ(k)))...):
73b8e80941Smrg	 */
74b8e80941Smrg	unsigned last_r = 0;
75b8e80941Smrg
76b8e80941Smrg	for (int k = i - 1; k >= 0; k--) {
77b8e80941Smrg		unsigned r = MAX2(a[k], b[k] + last_r);
78b8e80941Smrg
79b8e80941Smrg		if (k > 0)
80b8e80941Smrg			r += b[k-1];
81b8e80941Smrg
82b8e80941Smrg		last_r = r;
83b8e80941Smrg	}
84b8e80941Smrg
85b8e80941Smrg	last_r = MAX2(last_r, dest_regs(instr));
86b8e80941Smrg
87b8e80941Smrg	instr->sun = last_r;
88b8e80941Smrg
89b8e80941Smrg	return instr->sun;
90b8e80941Smrg}
91b8e80941Smrg
92b8e80941Smrgvoid
93b8e80941Smrgir3_sun(struct ir3 *ir)
94b8e80941Smrg{
95b8e80941Smrg	unsigned max = 0;
96b8e80941Smrg
97b8e80941Smrg	ir3_clear_mark(ir);
98b8e80941Smrg
99b8e80941Smrg	for (unsigned i = 0; i < ir->noutputs; i++)
100b8e80941Smrg		if (ir->outputs[i])
101b8e80941Smrg			max = MAX2(max, number_instr(ir->outputs[i]));
102b8e80941Smrg
103b8e80941Smrg	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
104b8e80941Smrg		for (unsigned i = 0; i < block->keeps_count; i++)
105b8e80941Smrg			max = MAX2(max, number_instr(block->keeps[i]));
106b8e80941Smrg		if (block->condition)
107b8e80941Smrg			max = MAX2(max, number_instr(block->condition));
108b8e80941Smrg	}
109b8e80941Smrg
110b8e80941Smrg	ir->max_sun = max;
111b8e80941Smrg}
112