1/*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "ir3_ra.h"
25
26/* The spilling pass leaves out a few details required to successfully operate
27 * ldp/stp:
28 *
29 * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
30 *    that and just spills/restores entire values, including arrays and values
31 *    created for texture setup which can be more than 4 components.
32 * 2. The spiller doesn't add barrier dependencies needed for post-RA
33 *    scheduling.
34 *
35 * The first one, in particular, is much easier to handle after RA because
36 * arrays and normal values can be treated the same way. Therefore this pass
37 * runs after RA, and handles both issues. This keeps the complexity out of the
38 * spiller.
39 */
40
41static void
42split_spill(struct ir3_instruction *spill)
43{
44   unsigned orig_components = spill->srcs[2]->uim_val;
45
46   /* We don't handle splitting dependencies. */
47   assert(spill->deps_count == 0);
48
49   if (orig_components <= 4) {
50      if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
51         spill->srcs[1]->wrmask = MASK(orig_components);
52         spill->srcs[1]->num = spill->srcs[1]->array.base;
53         spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
54      }
55      return;
56   }
57
58   for (unsigned comp = 0; comp < orig_components; comp += 4) {
59      unsigned components = MIN2(orig_components - comp, 4);
60      struct ir3_instruction *clone = ir3_instr_clone(spill);
61      ir3_instr_move_before(clone, spill);
62
63      clone->srcs[1]->wrmask = MASK(components);
64      if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
65         clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
66         clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
67      }
68
69      clone->srcs[2]->uim_val = components;
70      clone->cat6.dst_offset +=
71         comp * ((spill->srcs[1]->flags & IR3_REG_HALF) ? 2 : 4);
72   }
73
74   list_delinit(&spill->node);
75}
76
77static void
78split_reload(struct ir3_instruction *reload)
79{
80   unsigned orig_components = reload->srcs[2]->uim_val;
81
82   assert(reload->deps_count == 0);
83
84   if (orig_components <= 4) {
85      if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
86         reload->dsts[0]->wrmask = MASK(orig_components);
87         reload->dsts[0]->num = reload->dsts[0]->array.base;
88         reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
89      }
90      return;
91   }
92
93   for (unsigned comp = 0; comp < orig_components; comp += 4) {
94      unsigned components = MIN2(orig_components - comp, 4);
95      struct ir3_instruction *clone = ir3_instr_clone(reload);
96      ir3_instr_move_before(clone, reload);
97
98      clone->dsts[0]->wrmask = MASK(components);
99      if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
100         clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
101         clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
102      }
103
104      clone->srcs[2]->uim_val = components;
105      clone->srcs[1]->uim_val +=
106         comp * ((reload->dsts[0]->flags & IR3_REG_HALF) ? 2 : 4);
107   }
108
109   list_delinit(&reload->node);
110}
111
112static void
113add_spill_reload_deps(struct ir3_block *block)
114{
115   struct ir3_instruction *last_spill = NULL;
116
117   foreach_instr (instr, &block->instr_list) {
118      if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
119          last_spill) {
120         ir3_instr_add_dep(instr, last_spill);
121      }
122
123      if (instr->opc == OPC_SPILL_MACRO)
124         last_spill = instr;
125   }
126
127
128   last_spill = NULL;
129
130   foreach_instr_rev (instr, &block->instr_list) {
131      if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
132          last_spill) {
133         ir3_instr_add_dep(last_spill, instr);
134      }
135
136      if (instr->opc == OPC_SPILL_MACRO)
137         last_spill = instr;
138   }
139}
140
141bool
142ir3_lower_spill(struct ir3 *ir)
143{
144   foreach_block (block, &ir->block_list) {
145      foreach_instr_safe (instr, &block->instr_list) {
146         if (instr->opc == OPC_SPILL_MACRO)
147            split_spill(instr);
148         else if (instr->opc == OPC_RELOAD_MACRO)
149            split_reload(instr);
150      }
151
152      add_spill_reload_deps(block);
153
154      foreach_instr (instr, &block->instr_list) {
155         if (instr->opc == OPC_SPILL_MACRO)
156            instr->opc = OPC_STP;
157         else if (instr->opc == OPC_RELOAD_MACRO)
158            instr->opc = OPC_LDP;
159      }
160   }
161
162   return true;
163}
164