1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include "util/ralloc.h" 26#include "util/u_half.h" 27#include "util/bitscan.h" 28 29#include "ppir.h" 30#include "codegen.h" 31#include "lima_context.h" 32 33static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift) 34{ 35 unsigned ret = 0; 36 for (int i = 0; i < 4; i++) 37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2); 38 return ret; 39} 40 41static int get_scl_reg_index(ppir_src *src, int component) 42{ 43 int ret = ppir_target_get_src_reg_index(src); 44 ret += src->swizzle[component]; 45 return ret; 46} 47 48static void ppir_codegen_encode_varying(ppir_node *node, void *code) 49{ 50 ppir_codegen_field_varying *f = code; 51 ppir_load_node *load = ppir_node_to_load(node); 52 ppir_dest *dest = &load->dest; 53 int index = ppir_target_get_dest_reg_index(dest); 54 int num_components = load->num_components; 55 56 if (num_components) { 57 assert(node->op == ppir_op_load_varying || 58 node->op == ppir_op_load_coords || 59 node->op == ppir_op_load_fragcoord); 60 61 f->imm.dest = index >> 2; 62 f->imm.mask = dest->write_mask << (index & 0x3); 63 64 int alignment = num_components == 3 ? 3 : num_components - 1; 65 f->imm.alignment = alignment; 66 f->imm.offset_vector = 0xf; 67 68 if (alignment == 3) 69 f->imm.index = load->index >> 2; 70 else 71 f->imm.index = load->index >> alignment; 72 73 if (node->op == ppir_op_load_fragcoord) { 74 f->imm.source_type = 2; 75 f->imm.perspective = 3; 76 } 77 } 78 else { 79 assert(node->op == ppir_op_load_coords); 80 81 f->reg.dest = index >> 2; 82 f->reg.mask = dest->write_mask << (index & 0x3); 83 84 f->reg.source_type = 1; 85 86 ppir_src *src = &load->src; 87 index = ppir_target_get_src_reg_index(src); 88 f->reg.source = index >> 2; 89 f->reg.negate = src->negate; 90 f->reg.absolute = src->absolute; 91 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0); 92 } 93} 94 95static void ppir_codegen_encode_texld(ppir_node *node, void *code) 96{ 97 ppir_codegen_field_sampler *f = code; 98 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node); 99 100 f->index = ldtex->sampler; 101 f->lod_bias_en = 0; 102 f->type = ppir_codegen_sampler_type_2d; 103 f->offset_en = 0; 104 f->unknown_2 = 0x39001; 105} 106 107static void ppir_codegen_encode_uniform(ppir_node *node, void *code) 108{ 109 ppir_codegen_field_uniform *f = code; 110 ppir_load_node *load = ppir_node_to_load(node); 111 112 switch (node->op) { 113 case ppir_op_load_uniform: 114 f->source = ppir_codegen_uniform_src_uniform; 115 break; 116 case ppir_op_load_temp: 117 f->source = ppir_codegen_uniform_src_temporary; 118 break; 119 default: 120 assert(0); 121 } 122 123 int num_components = load->num_components; 124 int alignment = num_components == 4 ? 2 : num_components - 1; 125 126 f->alignment = alignment; 127 128 /* TODO: uniform can be also combined like varying */ 129 f->index = load->index << (2 - alignment); 130} 131 132static unsigned shift_to_op(int shift) 133{ 134 assert(shift >= -3 && shift <= 3); 135 return shift < 0 ? shift + 8 : shift; 136} 137 138static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code) 139{ 140 ppir_codegen_field_vec4_mul *f = code; 141 ppir_alu_node *alu = ppir_node_to_alu(node); 142 143 ppir_dest *dest = &alu->dest; 144 int dest_shift = 0; 145 if (dest->type != ppir_target_pipeline) { 146 int index = ppir_target_get_dest_reg_index(dest); 147 dest_shift = index & 0x3; 148 f->dest = index >> 2; 149 f->mask = dest->write_mask << dest_shift; 150 } 151 f->dest_modifier = dest->modifier; 152 153 switch (node->op) { 154 case ppir_op_mul: 155 f->op = shift_to_op(alu->shift); 156 break; 157 case ppir_op_mov: 158 f->op = ppir_codegen_vec4_mul_op_mov; 159 break; 160 case ppir_op_max: 161 f->op = ppir_codegen_vec4_mul_op_max; 162 break; 163 case ppir_op_min: 164 f->op = ppir_codegen_vec4_mul_op_min; 165 break; 166 case ppir_op_and: 167 f->op = ppir_codegen_vec4_mul_op_and; 168 break; 169 case ppir_op_or: 170 f->op = ppir_codegen_vec4_mul_op_or; 171 break; 172 case ppir_op_xor: 173 f->op = ppir_codegen_vec4_mul_op_xor; 174 break; 175 case ppir_op_gt: 176 f->op = ppir_codegen_vec4_mul_op_gt; 177 break; 178 case ppir_op_ge: 179 f->op = ppir_codegen_vec4_mul_op_ge; 180 break; 181 case ppir_op_eq: 182 f->op = ppir_codegen_vec4_mul_op_eq; 183 break; 184 case ppir_op_ne: 185 f->op = ppir_codegen_vec4_mul_op_ne; 186 break; 187 case ppir_op_not: 188 f->op = ppir_codegen_vec4_mul_op_not; 189 break; 190 default: 191 break; 192 } 193 194 ppir_src *src = alu->src; 195 int index = ppir_target_get_src_reg_index(src); 196 f->arg0_source = index >> 2; 197 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 198 f->arg0_absolute = src->absolute; 199 f->arg0_negate = src->negate; 200 201 if (alu->num_src == 2) { 202 src = alu->src + 1; 203 index = ppir_target_get_src_reg_index(src); 204 f->arg1_source = index >> 2; 205 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 206 f->arg1_absolute = src->absolute; 207 f->arg1_negate = src->negate; 208 } 209} 210 211static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code) 212{ 213 ppir_codegen_field_float_mul *f = code; 214 ppir_alu_node *alu = ppir_node_to_alu(node); 215 216 ppir_dest *dest = &alu->dest; 217 int dest_component = ffs(dest->write_mask) - 1; 218 assert(dest_component >= 0); 219 220 if (dest->type != ppir_target_pipeline) { 221 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; 222 f->output_en = true; 223 } 224 f->dest_modifier = dest->modifier; 225 226 switch (node->op) { 227 case ppir_op_mul: 228 f->op = shift_to_op(alu->shift); 229 break; 230 case ppir_op_mov: 231 f->op = ppir_codegen_float_mul_op_mov; 232 break; 233 case ppir_op_max: 234 f->op = ppir_codegen_float_mul_op_max; 235 break; 236 case ppir_op_min: 237 f->op = ppir_codegen_float_mul_op_min; 238 break; 239 case ppir_op_and: 240 f->op = ppir_codegen_float_mul_op_and; 241 break; 242 case ppir_op_or: 243 f->op = ppir_codegen_float_mul_op_or; 244 break; 245 case ppir_op_xor: 246 f->op = ppir_codegen_float_mul_op_xor; 247 break; 248 case ppir_op_gt: 249 f->op = ppir_codegen_float_mul_op_gt; 250 break; 251 case ppir_op_ge: 252 f->op = ppir_codegen_float_mul_op_ge; 253 break; 254 case ppir_op_eq: 255 f->op = ppir_codegen_float_mul_op_eq; 256 break; 257 case ppir_op_ne: 258 f->op = ppir_codegen_float_mul_op_ne; 259 break; 260 case ppir_op_not: 261 f->op = ppir_codegen_float_mul_op_not; 262 break; 263 default: 264 break; 265 } 266 267 ppir_src *src = alu->src; 268 f->arg0_source = get_scl_reg_index(src, dest_component); 269 f->arg0_absolute = src->absolute; 270 f->arg0_negate = src->negate; 271 272 if (alu->num_src == 2) { 273 src = alu->src + 1; 274 f->arg1_source = get_scl_reg_index(src, dest_component); 275 f->arg1_absolute = src->absolute; 276 f->arg1_negate = src->negate; 277 } 278} 279 280static void ppir_codegen_encode_vec_add(ppir_node *node, void *code) 281{ 282 ppir_codegen_field_vec4_acc *f = code; 283 ppir_alu_node *alu = ppir_node_to_alu(node); 284 285 ppir_dest *dest = &alu->dest; 286 int index = ppir_target_get_dest_reg_index(dest); 287 int dest_shift = index & 0x3; 288 f->dest = index >> 2; 289 f->mask = dest->write_mask << dest_shift; 290 f->dest_modifier = dest->modifier; 291 292 switch (node->op) { 293 case ppir_op_add: 294 f->op = ppir_codegen_vec4_acc_op_add; 295 break; 296 case ppir_op_mov: 297 f->op = ppir_codegen_vec4_acc_op_mov; 298 break; 299 case ppir_op_sum3: 300 f->op = ppir_codegen_vec4_acc_op_sum3; 301 dest_shift = 0; 302 break; 303 case ppir_op_sum4: 304 f->op = ppir_codegen_vec4_acc_op_sum4; 305 dest_shift = 0; 306 break; 307 case ppir_op_floor: 308 f->op = ppir_codegen_vec4_acc_op_floor; 309 break; 310 case ppir_op_ceil: 311 f->op = ppir_codegen_vec4_acc_op_ceil; 312 break; 313 case ppir_op_fract: 314 f->op = ppir_codegen_vec4_acc_op_fract; 315 break; 316 case ppir_op_gt: 317 f->op = ppir_codegen_vec4_acc_op_gt; 318 break; 319 case ppir_op_ge: 320 f->op = ppir_codegen_vec4_acc_op_ge; 321 break; 322 case ppir_op_eq: 323 f->op = ppir_codegen_vec4_acc_op_eq; 324 break; 325 case ppir_op_ne: 326 f->op = ppir_codegen_vec4_acc_op_ne; 327 break; 328 case ppir_op_select: 329 f->op = ppir_codegen_vec4_acc_op_sel; 330 break; 331 default: 332 break; 333 } 334 335 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src; 336 index = ppir_target_get_src_reg_index(src); 337 338 if (src->type == ppir_target_pipeline && 339 src->pipeline == ppir_pipeline_reg_vmul) 340 f->mul_in = true; 341 else 342 f->arg0_source = index >> 2; 343 344 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 345 f->arg0_absolute = src->absolute; 346 f->arg0_negate = src->negate; 347 348 if (++src < alu->src + alu->num_src) { 349 index = ppir_target_get_src_reg_index(src); 350 f->arg1_source = index >> 2; 351 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 352 f->arg1_absolute = src->absolute; 353 f->arg1_negate = src->negate; 354 } 355} 356 357static void ppir_codegen_encode_scl_add(ppir_node *node, void *code) 358{ 359 ppir_codegen_field_float_acc *f = code; 360 ppir_alu_node *alu = ppir_node_to_alu(node); 361 362 ppir_dest *dest = &alu->dest; 363 int dest_component = ffs(dest->write_mask) - 1; 364 assert(dest_component >= 0); 365 366 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; 367 f->output_en = true; 368 f->dest_modifier = dest->modifier; 369 370 switch (node->op) { 371 case ppir_op_add: 372 f->op = shift_to_op(alu->shift); 373 break; 374 case ppir_op_mov: 375 f->op = ppir_codegen_float_acc_op_mov; 376 break; 377 case ppir_op_max: 378 f->op = ppir_codegen_float_acc_op_max; 379 break; 380 case ppir_op_min: 381 f->op = ppir_codegen_float_acc_op_min; 382 break; 383 case ppir_op_floor: 384 f->op = ppir_codegen_float_acc_op_floor; 385 break; 386 case ppir_op_ceil: 387 f->op = ppir_codegen_float_acc_op_ceil; 388 break; 389 case ppir_op_fract: 390 f->op = ppir_codegen_float_acc_op_fract; 391 break; 392 case ppir_op_gt: 393 f->op = ppir_codegen_float_acc_op_gt; 394 break; 395 case ppir_op_ge: 396 f->op = ppir_codegen_float_acc_op_ge; 397 break; 398 case ppir_op_eq: 399 f->op = ppir_codegen_float_acc_op_eq; 400 break; 401 case ppir_op_ne: 402 f->op = ppir_codegen_float_acc_op_ne; 403 break; 404 case ppir_op_select: 405 f->op = ppir_codegen_float_acc_op_sel; 406 break; 407 default: 408 break; 409 } 410 411 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src; 412 if (src->type == ppir_target_pipeline && 413 src->pipeline == ppir_pipeline_reg_fmul) 414 f->mul_in = true; 415 else 416 f->arg0_source = get_scl_reg_index(src, dest_component); 417 f->arg0_absolute = src->absolute; 418 f->arg0_negate = src->negate; 419 420 if (++src < alu->src + alu->num_src) { 421 f->arg1_source = get_scl_reg_index(src, dest_component); 422 f->arg1_absolute = src->absolute; 423 f->arg1_negate = src->negate; 424 } 425} 426 427static void ppir_codegen_encode_combine(ppir_node *node, void *code) 428{ 429 ppir_codegen_field_combine *f = code; 430 ppir_alu_node *alu = ppir_node_to_alu(node); 431 432 switch (node->op) { 433 case ppir_op_rsqrt: 434 case ppir_op_log2: 435 case ppir_op_exp2: 436 case ppir_op_rcp: 437 case ppir_op_sqrt: 438 case ppir_op_sin: 439 case ppir_op_cos: 440 { 441 f->scalar.dest_vec = false; 442 f->scalar.arg1_en = false; 443 444 ppir_dest *dest = &alu->dest; 445 int dest_component = ffs(dest->write_mask) - 1; 446 assert(dest_component >= 0); 447 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component; 448 f->scalar.dest_modifier = dest->modifier; 449 450 ppir_src *src = alu->src; 451 f->scalar.arg0_src = get_scl_reg_index(src, dest_component); 452 f->scalar.arg0_absolute = src->absolute; 453 f->scalar.arg0_negate = src->negate; 454 455 switch (node->op) { 456 case ppir_op_rsqrt: 457 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt; 458 break; 459 case ppir_op_log2: 460 f->scalar.op = ppir_codegen_combine_scalar_op_log2; 461 break; 462 case ppir_op_exp2: 463 f->scalar.op = ppir_codegen_combine_scalar_op_exp2; 464 break; 465 case ppir_op_rcp: 466 f->scalar.op = ppir_codegen_combine_scalar_op_rcp; 467 break; 468 case ppir_op_sqrt: 469 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt; 470 break; 471 case ppir_op_sin: 472 f->scalar.op = ppir_codegen_combine_scalar_op_sin; 473 break; 474 case ppir_op_cos: 475 f->scalar.op = ppir_codegen_combine_scalar_op_cos; 476 break; 477 default: 478 break; 479 } 480 } 481 default: 482 break; 483 } 484} 485 486static void ppir_codegen_encode_store_temp(ppir_node *node, void *code) 487{ 488 assert(node->op == ppir_op_store_temp); 489 490 ppir_codegen_field_temp_write *f = code; 491 ppir_store_node *snode = ppir_node_to_store(node); 492 int num_components = snode->num_components; 493 494 f->temp_write.dest = 0x03; // 11 - temporary 495 f->temp_write.source = snode->src.reg->index; 496 497 int alignment = num_components == 4 ? 2 : num_components - 1; 498 f->temp_write.alignment = alignment; 499 f->temp_write.index = snode->index << (2 - alignment); 500 501 f->temp_write.offset_reg = snode->index >> 2; 502} 503 504static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code) 505{ 506 for (int i = 0; i < constant->num; i++) 507 code[i] = util_float_to_half(constant->value[i].f); 508} 509 510typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *); 511 512static const ppir_codegen_instr_slot_encode_func 513ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = { 514 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying, 515 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld, 516 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform, 517 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul, 518 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul, 519 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add, 520 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add, 521 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine, 522 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp, 523}; 524 525static const int ppir_codegen_field_size[] = { 526 34, 62, 41, 43, 30, 44, 31, 30, 41, 73 527}; 528 529static inline int align_to_word(int size) 530{ 531 return ((size + 0x1f) >> 5); 532} 533 534static int get_instr_encode_size(ppir_instr *instr) 535{ 536 int size = 0; 537 538 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 539 if (instr->slots[i]) 540 size += ppir_codegen_field_size[i]; 541 } 542 543 for (int i = 0; i < 2; i++) { 544 if (instr->constant[i].num) 545 size += 64; 546 } 547 548 return align_to_word(size) + 1; 549} 550 551static void bitcopy(void *dst, int dst_offset, void *src, int src_size) 552{ 553 int off1 = dst_offset & 0x1f; 554 uint32_t *cpy_dst = dst, *cpy_src = src; 555 556 cpy_dst += (dst_offset >> 5); 557 558 if (off1) { 559 int off2 = 32 - off1; 560 int cpy_size = 0; 561 while (1) { 562 *cpy_dst |= *cpy_src << off1; 563 cpy_dst++; 564 565 cpy_size += off2; 566 if (cpy_size >= src_size) 567 break; 568 569 *cpy_dst |= *cpy_src >> off2; 570 cpy_src++; 571 572 cpy_size += off1; 573 if (cpy_size >= src_size) 574 break; 575 } 576 } 577 else 578 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4); 579} 580 581static int encode_instr(ppir_instr *instr, void *code, void *last_code) 582{ 583 int size = 0; 584 ppir_codegen_ctrl *ctrl = code; 585 586 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 587 if (instr->slots[i]) { 588 /* max field size (73), align to dword */ 589 uint8_t output[12] = {0}; 590 591 ppir_codegen_encode_slot[i](instr->slots[i], output); 592 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]); 593 594 size += ppir_codegen_field_size[i]; 595 ctrl->fields |= 1 << i; 596 } 597 } 598 599 if (instr->slots[PPIR_INSTR_SLOT_TEXLD]) 600 ctrl->sync = true; 601 602 for (int i = 0; i < 2; i++) { 603 if (instr->constant[i].num) { 604 uint16_t output[4] = {0}; 605 606 ppir_codegen_encode_const(instr->constant + i, output); 607 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16); 608 609 size += 64; 610 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i); 611 } 612 } 613 614 size = align_to_word(size) + 1; 615 616 ctrl->count = size; 617 if (instr->is_end) 618 ctrl->stop = true; 619 620 if (last_code) { 621 ppir_codegen_ctrl *last_ctrl = last_code; 622 last_ctrl->next_count = size; 623 last_ctrl->prefetch = true; 624 } 625 626 return size; 627} 628 629static void ppir_codegen_print_prog(ppir_compiler *comp) 630{ 631 uint32_t *prog = comp->prog->shader; 632 unsigned offset = 0; 633 634 printf("========ppir codegen========\n"); 635 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 636 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 637 printf("%03d: ", instr->index); 638 int n = prog[0] & 0x1f; 639 for (int i = 0; i < n; i++) { 640 if (i && i % 6 == 0) 641 printf("\n "); 642 printf("%08x ", prog[i]); 643 } 644 printf("\n"); 645 ppir_disassemble_instr(prog, offset); 646 prog += n; 647 offset += n; 648 } 649 } 650 printf("-----------------------\n"); 651} 652 653bool ppir_codegen_prog(ppir_compiler *comp) 654{ 655 int size = 0; 656 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 657 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 658 size += get_instr_encode_size(instr); 659 } 660 } 661 662 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t)); 663 if (!prog) 664 return false; 665 666 uint32_t *code = prog, *last_code = NULL; 667 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 668 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 669 int offset = encode_instr(instr, code, last_code); 670 last_code = code; 671 code += offset; 672 } 673 } 674 675 comp->prog->shader = prog; 676 comp->prog->shader_size = size * sizeof(uint32_t); 677 678 if (lima_debug & LIMA_DEBUG_PP) 679 ppir_codegen_print_prog(comp); 680 681 return true; 682} 683