1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include "util/bitscan.h" 26#include "util/ralloc.h" 27 28#include "ppir.h" 29 30static bool ppir_lower_const(ppir_block *block, ppir_node *node) 31{ 32 if (ppir_node_is_root(node)) { 33 ppir_node_delete(node); 34 return true; 35 } 36 37 assert(ppir_node_has_single_succ(node)); 38 39 ppir_node *succ = ppir_node_first_succ(node); 40 ppir_dest *dest = ppir_node_get_dest(node); 41 42 switch (succ->type) { 43 case ppir_node_type_alu: 44 case ppir_node_type_branch: 45 /* ALU and branch can consume consts directly */ 46 dest->type = ppir_target_pipeline; 47 /* Reg will be updated in node_to_instr later */ 48 dest->pipeline = ppir_pipeline_reg_const0; 49 50 /* single succ can still have multiple references to this node */ 51 for (int i = 0; i < ppir_node_get_src_num(succ); i++) { 52 ppir_src *src = ppir_node_get_src(succ, i); 53 if (src && src->node == node) { 54 src->type = ppir_target_pipeline; 55 src->pipeline = ppir_pipeline_reg_const0; 56 } 57 } 58 return true; 59 default: 60 /* Create a move for everyone else */ 61 break; 62 } 63 64 ppir_node *move = ppir_node_insert_mov(node); 65 if (unlikely(!move)) 66 return false; 67 68 ppir_debug("lower const create move %d for %d\n", 69 move->index, node->index); 70 71 /* Need to be careful with changing src/dst type here: 72 * it has to be done *after* successors have their children 73 * replaced, otherwise ppir_node_replace_child() won't find 74 * matching src/dst and as result won't work 75 */ 76 ppir_src *mov_src = ppir_node_get_src(move, 0); 77 mov_src->type = dest->type = ppir_target_pipeline; 78 mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_const0; 79 80 return true; 81} 82 83static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node) 84{ 85 /* swapped op must be the next op */ 86 node->op++; 87 88 assert(node->type == ppir_node_type_alu); 89 ppir_alu_node *alu = ppir_node_to_alu(node); 90 assert(alu->num_src == 2); 91 92 ppir_src tmp = alu->src[0]; 93 alu->src[0] = alu->src[1]; 94 alu->src[1] = tmp; 95 return true; 96} 97 98static bool ppir_lower_load(ppir_block *block, ppir_node *node) 99{ 100 ppir_dest *dest = ppir_node_get_dest(node); 101 if (ppir_node_is_root(node) && dest->type == ppir_target_ssa) { 102 ppir_node_delete(node); 103 return true; 104 } 105 106 /* load can have multiple successors in case if we duplicated load node 107 * that has load node in source 108 */ 109 if ((ppir_node_has_single_src_succ(node) || ppir_node_is_root(node)) && 110 dest->type != ppir_target_register) { 111 ppir_node *succ = ppir_node_first_succ(node); 112 switch (succ->type) { 113 case ppir_node_type_alu: 114 case ppir_node_type_branch: { 115 /* single succ can still have multiple references to this node */ 116 for (int i = 0; i < ppir_node_get_src_num(succ); i++) { 117 ppir_src *src = ppir_node_get_src(succ, i); 118 if (src && src->node == node) { 119 /* Can consume uniforms directly */ 120 src->type = dest->type = ppir_target_pipeline; 121 src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform; 122 } 123 } 124 return true; 125 } 126 default: 127 /* Create mov for everyone else */ 128 break; 129 } 130 } 131 132 ppir_node *move = ppir_node_insert_mov(node); 133 if (unlikely(!move)) 134 return false; 135 136 ppir_src *mov_src = ppir_node_get_src(move, 0); 137 mov_src->type = dest->type = ppir_target_pipeline; 138 mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform; 139 140 return true; 141} 142 143static bool ppir_lower_ddxy(ppir_block *block, ppir_node *node) 144{ 145 assert(node->type == ppir_node_type_alu); 146 ppir_alu_node *alu = ppir_node_to_alu(node); 147 148 alu->src[1] = alu->src[0]; 149 if (node->op == ppir_op_ddx) 150 alu->src[1].negate = !alu->src[1].negate; 151 else if (node->op == ppir_op_ddy) 152 alu->src[0].negate = !alu->src[0].negate; 153 else 154 assert(0); 155 156 alu->num_src = 2; 157 158 return true; 159} 160 161static bool ppir_lower_texture(ppir_block *block, ppir_node *node) 162{ 163 ppir_dest *dest = ppir_node_get_dest(node); 164 165 if (ppir_node_has_single_succ(node) && dest->type == ppir_target_ssa) { 166 ppir_node *succ = ppir_node_first_succ(node); 167 dest->type = ppir_target_pipeline; 168 dest->pipeline = ppir_pipeline_reg_sampler; 169 170 for (int i = 0; i < ppir_node_get_src_num(succ); i++) { 171 ppir_src *src = ppir_node_get_src(succ, i); 172 if (src && src->node == node) { 173 src->type = ppir_target_pipeline; 174 src->pipeline = ppir_pipeline_reg_sampler; 175 } 176 } 177 return true; 178 } 179 180 /* Create move node as fallback */ 181 ppir_node *move = ppir_node_insert_mov(node); 182 if (unlikely(!move)) 183 return false; 184 185 ppir_debug("lower texture create move %d for %d\n", 186 move->index, node->index); 187 188 ppir_src *mov_src = ppir_node_get_src(move, 0); 189 mov_src->type = dest->type = ppir_target_pipeline; 190 mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_sampler; 191 192 return true; 193} 194 195/* Check if the select condition and ensure it can be inserted to 196 * the scalar mul slot */ 197static bool ppir_lower_select(ppir_block *block, ppir_node *node) 198{ 199 ppir_alu_node *alu = ppir_node_to_alu(node); 200 ppir_src *src0 = &alu->src[0]; 201 ppir_src *src1 = &alu->src[1]; 202 ppir_src *src2 = &alu->src[2]; 203 204 /* If the condition is already an alu scalar whose only successor 205 * is the select node, just turn it into pipeline output. */ 206 /* The (src2->node == cond) case is a tricky exception. 207 * The reason is that we must force cond to output to ^fmul -- but 208 * then it no longer writes to a register and it is impossible to 209 * reference ^fmul in src2. So in that exceptional case, also fall 210 * back to the mov. */ 211 ppir_node *cond = src0->node; 212 if (cond && 213 cond->type == ppir_node_type_alu && 214 ppir_node_has_single_succ(cond) && 215 ppir_target_is_scalar(ppir_node_get_dest(cond)) && 216 ppir_node_schedulable_slot(cond, PPIR_INSTR_SLOT_ALU_SCL_MUL) && 217 src2->node != cond) { 218 219 ppir_dest *cond_dest = ppir_node_get_dest(cond); 220 cond_dest->type = ppir_target_pipeline; 221 cond_dest->pipeline = ppir_pipeline_reg_fmul; 222 223 ppir_node_target_assign(src0, cond); 224 225 /* src1 could also be a reference from the same node as 226 * the condition, so update it in that case. */ 227 if (src1->node && src1->node == cond) 228 ppir_node_target_assign(src1, cond); 229 230 return true; 231 } 232 233 /* If the condition can't be used for any reason, insert a mov 234 * so that the condition can end up in ^fmul */ 235 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0); 236 if (!move) 237 return false; 238 list_addtail(&move->list, &node->list); 239 240 ppir_alu_node *move_alu = ppir_node_to_alu(move); 241 ppir_src *move_src = move_alu->src; 242 move_src->type = src0->type; 243 move_src->ssa = src0->ssa; 244 move_src->swizzle[0] = src0->swizzle[0]; 245 move_alu->num_src = 1; 246 247 ppir_dest *move_dest = &move_alu->dest; 248 move_dest->type = ppir_target_pipeline; 249 move_dest->pipeline = ppir_pipeline_reg_fmul; 250 move_dest->write_mask = 1; 251 252 ppir_node *pred = src0->node; 253 ppir_dep *dep = ppir_dep_for_pred(node, pred); 254 if (dep) 255 ppir_node_replace_pred(dep, move); 256 else 257 ppir_node_add_dep(node, move, ppir_dep_src); 258 259 /* pred can be a register */ 260 if (pred) 261 ppir_node_add_dep(move, pred, ppir_dep_src); 262 263 ppir_node_target_assign(src0, move); 264 265 /* src1 could also be a reference from the same node as 266 * the condition, so update it in that case. */ 267 if (src1->node && src1->node == pred) 268 ppir_node_target_assign(src1, move); 269 270 return true; 271} 272 273static bool ppir_lower_trunc(ppir_block *block, ppir_node *node) 274{ 275 /* Turn it into a mov with a round to integer output modifier */ 276 ppir_alu_node *alu = ppir_node_to_alu(node); 277 ppir_dest *move_dest = &alu->dest; 278 move_dest->modifier = ppir_outmod_round; 279 node->op = ppir_op_mov; 280 281 return true; 282} 283 284static bool ppir_lower_abs(ppir_block *block, ppir_node *node) 285{ 286 /* Turn it into a mov and set the absolute modifier */ 287 ppir_alu_node *alu = ppir_node_to_alu(node); 288 289 assert(alu->num_src == 1); 290 291 alu->src[0].absolute = true; 292 alu->src[0].negate = false; 293 node->op = ppir_op_mov; 294 295 return true; 296} 297 298static bool ppir_lower_neg(ppir_block *block, ppir_node *node) 299{ 300 /* Turn it into a mov and set the negate modifier */ 301 ppir_alu_node *alu = ppir_node_to_alu(node); 302 303 assert(alu->num_src == 1); 304 305 alu->src[0].negate = !alu->src[0].negate; 306 node->op = ppir_op_mov; 307 308 return true; 309} 310 311static bool ppir_lower_sat(ppir_block *block, ppir_node *node) 312{ 313 /* Turn it into a mov with the saturate output modifier */ 314 ppir_alu_node *alu = ppir_node_to_alu(node); 315 316 assert(alu->num_src == 1); 317 318 ppir_dest *move_dest = &alu->dest; 319 move_dest->modifier = ppir_outmod_clamp_fraction; 320 node->op = ppir_op_mov; 321 322 return true; 323} 324 325static bool ppir_lower_branch(ppir_block *block, ppir_node *node) 326{ 327 ppir_branch_node *branch = ppir_node_to_branch(node); 328 329 /* Unconditional branch */ 330 if (branch->num_src == 0) 331 return true; 332 333 ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0); 334 335 if (!zero) 336 return false; 337 338 zero->constant.value[0].f = 0; 339 zero->constant.num = 1; 340 zero->dest.type = ppir_target_pipeline; 341 zero->dest.pipeline = ppir_pipeline_reg_const0; 342 zero->dest.ssa.num_components = 1; 343 zero->dest.write_mask = 0x01; 344 345 /* For now we're just comparing branch condition with 0, 346 * in future we should look whether it's possible to move 347 * comparision node into branch itself and use current 348 * way as a fallback for complex conditions. 349 */ 350 ppir_node_target_assign(&branch->src[1], &zero->node); 351 352 if (branch->negate) 353 branch->cond_eq = true; 354 else { 355 branch->cond_gt = true; 356 branch->cond_lt = true; 357 } 358 359 branch->num_src = 2; 360 361 ppir_node_add_dep(&branch->node, &zero->node, ppir_dep_src); 362 list_addtail(&zero->node.list, &node->list); 363 364 return true; 365} 366 367static bool ppir_lower_accum(ppir_block *block, ppir_node *node) 368{ 369 /* If the last argument of a node placed in PPIR_INSTR_SLOT_ALU_SCL_ADD 370 * (or PPIR_INSTR_SLOT_ALU_VEC_ADD) is placed in 371 * PPIR_INSTR_SLOT_ALU_SCL_MUL (or PPIR_INSTR_SLOT_ALU_VEC_MUL) we cannot 372 * save a register (and an instruction) by using a pipeline register. 373 * Therefore it is interesting to make sure arguments of that type are 374 * the first argument by swapping arguments (if possible) */ 375 ppir_alu_node *alu = ppir_node_to_alu(node); 376 377 assert(alu->num_src >= 2); 378 379 if (alu->src[0].type == ppir_target_pipeline) 380 return true; 381 382 if (alu->src[0].type == ppir_target_ssa) { 383 int *src_0_slots = ppir_op_infos[alu->src[0].node->op].slots; 384 if (src_0_slots) { 385 for (int i = 0; src_0_slots[i] != PPIR_INSTR_SLOT_END; i++) { 386 if ((src_0_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) || 387 (src_0_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) { 388 return true; 389 } 390 } 391 } 392 } 393 394 int src_to_swap = -1; 395 for (int j = 1; j < alu->num_src; j++) { 396 if (alu->src[j].type != ppir_target_ssa) 397 continue; 398 int *src_slots = ppir_op_infos[alu->src[j].node->op].slots; 399 if (!src_slots) 400 continue; 401 for (int i = 0; src_slots[i] != PPIR_INSTR_SLOT_END; i++) { 402 if ((src_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) || 403 (src_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) { 404 src_to_swap = j; 405 break; 406 } 407 } 408 if (src_to_swap > 0) 409 break; 410 } 411 412 if (src_to_swap < 0) 413 return true; 414 415 /* Swap arguments so that we can use a pipeline register later on */ 416 ppir_src tmp = alu->src[0]; 417 alu->src[0] = alu->src[src_to_swap]; 418 alu->src[src_to_swap] = tmp; 419 420 return true; 421} 422 423static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = { 424 [ppir_op_abs] = ppir_lower_abs, 425 [ppir_op_neg] = ppir_lower_neg, 426 [ppir_op_const] = ppir_lower_const, 427 [ppir_op_ddx] = ppir_lower_ddxy, 428 [ppir_op_ddy] = ppir_lower_ddxy, 429 [ppir_op_lt] = ppir_lower_swap_args, 430 [ppir_op_le] = ppir_lower_swap_args, 431 [ppir_op_load_texture] = ppir_lower_texture, 432 [ppir_op_select] = ppir_lower_select, 433 [ppir_op_trunc] = ppir_lower_trunc, 434 [ppir_op_sat] = ppir_lower_sat, 435 [ppir_op_branch] = ppir_lower_branch, 436 [ppir_op_load_uniform] = ppir_lower_load, 437 [ppir_op_load_temp] = ppir_lower_load, 438 [ppir_op_add] = ppir_lower_accum, 439 [ppir_op_max] = ppir_lower_accum, 440 [ppir_op_min] = ppir_lower_accum, 441 [ppir_op_eq] = ppir_lower_accum, 442 [ppir_op_ne] = ppir_lower_accum, 443}; 444 445bool ppir_lower_prog(ppir_compiler *comp) 446{ 447 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 448 list_for_each_entry_safe(ppir_node, node, &block->node_list, list) { 449 if (ppir_lower_funcs[node->op] && 450 !ppir_lower_funcs[node->op](block, node)) 451 return false; 452 } 453 } 454 455 return true; 456} 457