1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include "util/ralloc.h" 26#include "util/half_float.h" 27#include "util/bitscan.h" 28 29#include "ppir.h" 30#include "codegen.h" 31#include "lima_context.h" 32 33static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift) 34{ 35 unsigned ret = 0; 36 for (int i = 0; i < 4; i++) 37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2); 38 return ret; 39} 40 41static int get_scl_reg_index(ppir_src *src, int component) 42{ 43 int ret = ppir_target_get_src_reg_index(src); 44 ret += src->swizzle[component]; 45 return ret; 46} 47 48static void ppir_codegen_encode_varying(ppir_node *node, void *code) 49{ 50 ppir_codegen_field_varying *f = code; 51 ppir_load_node *load = ppir_node_to_load(node); 52 ppir_dest *dest = &load->dest; 53 int index = ppir_target_get_dest_reg_index(dest); 54 int num_components = load->num_components; 55 56 if (node->op != ppir_op_load_coords_reg) { 57 assert(node->op == ppir_op_load_varying || 58 node->op == ppir_op_load_coords || 59 node->op == ppir_op_load_fragcoord || 60 node->op == ppir_op_load_pointcoord || 61 node->op == ppir_op_load_frontface); 62 63 f->imm.dest = index >> 2; 64 f->imm.mask = dest->write_mask << (index & 0x3); 65 66 int alignment = num_components == 3 ? 3 : num_components - 1; 67 f->imm.alignment = alignment; 68 69 if (load->num_src) { 70 index = ppir_target_get_src_reg_index(&load->src); 71 f->imm.offset_vector = index >> 2; 72 f->imm.offset_scalar = index & 0x3; 73 } else 74 f->imm.offset_vector = 0xf; 75 76 if (alignment == 3) 77 f->imm.index = load->index >> 2; 78 else 79 f->imm.index = load->index >> alignment; 80 81 switch (node->op) { 82 case ppir_op_load_fragcoord: 83 f->imm.source_type = 2; 84 f->imm.perspective = 3; 85 break; 86 case ppir_op_load_pointcoord: 87 f->imm.source_type = 3; 88 break; 89 case ppir_op_load_frontface: 90 f->imm.source_type = 3; 91 f->imm.perspective = 1; 92 break; 93 case ppir_op_load_coords: 94 /* num_components == 3 implies cubemap as we don't support 3D textures */ 95 f->imm.source_type = num_components == 3 ? 2 : 0; 96 break; 97 default: 98 break; 99 } 100 } 101 else { /* node->op == ppir_op_load_coords_reg */ 102 f->reg.dest = index >> 2; 103 f->reg.mask = dest->write_mask << (index & 0x3); 104 105 if (load->num_src) { 106 /* num_components == 3 implies cubemap as we don't support 3D textures */ 107 if (num_components == 3) { 108 f->reg.source_type = 2; 109 f->reg.perspective = 1; 110 } else { 111 f->reg.source_type = 1; 112 } 113 ppir_src *src = &load->src; 114 index = ppir_target_get_src_reg_index(src); 115 f->reg.source = index >> 2; 116 f->reg.negate = src->negate; 117 f->reg.absolute = src->absolute; 118 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0); 119 } 120 } 121} 122 123static void ppir_codegen_encode_texld(ppir_node *node, void *code) 124{ 125 ppir_codegen_field_sampler *f = code; 126 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node); 127 128 f->index = ldtex->sampler; 129 130 f->lod_bias_en = ldtex->lod_bias_en; 131 f->explicit_lod = ldtex->explicit_lod; 132 if (ldtex->lod_bias_en) 133 f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]); 134 135 switch (ldtex->sampler_dim) { 136 case GLSL_SAMPLER_DIM_2D: 137 case GLSL_SAMPLER_DIM_RECT: 138 case GLSL_SAMPLER_DIM_EXTERNAL: 139 f->type = ppir_codegen_sampler_type_2d; 140 break; 141 case GLSL_SAMPLER_DIM_CUBE: 142 f->type = ppir_codegen_sampler_type_cube; 143 break; 144 default: 145 break; 146 } 147 148 f->offset_en = 0; 149 f->unknown_2 = 0x39001; 150} 151 152static void ppir_codegen_encode_uniform(ppir_node *node, void *code) 153{ 154 ppir_codegen_field_uniform *f = code; 155 ppir_load_node *load = ppir_node_to_load(node); 156 157 switch (node->op) { 158 case ppir_op_load_uniform: 159 f->source = ppir_codegen_uniform_src_uniform; 160 break; 161 case ppir_op_load_temp: 162 f->source = ppir_codegen_uniform_src_temporary; 163 break; 164 default: 165 assert(0); 166 } 167 168 /* Uniforms are always aligned to vec4 boundary */ 169 f->alignment = 2; 170 f->index = load->index; 171 172 if (load->num_src) { 173 f->offset_en = 1; 174 f->offset_reg = ppir_target_get_src_reg_index(&load->src); 175 } 176} 177 178static unsigned shift_to_op(int shift) 179{ 180 assert(shift >= -3 && shift <= 3); 181 return shift < 0 ? shift + 8 : shift; 182} 183 184static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code) 185{ 186 ppir_codegen_field_vec4_mul *f = code; 187 ppir_alu_node *alu = ppir_node_to_alu(node); 188 189 ppir_dest *dest = &alu->dest; 190 int dest_shift = 0; 191 if (dest->type != ppir_target_pipeline) { 192 int index = ppir_target_get_dest_reg_index(dest); 193 dest_shift = index & 0x3; 194 f->dest = index >> 2; 195 f->mask = dest->write_mask << dest_shift; 196 } 197 f->dest_modifier = dest->modifier; 198 199 switch (node->op) { 200 case ppir_op_mul: 201 f->op = shift_to_op(alu->shift); 202 break; 203 case ppir_op_mov: 204 f->op = ppir_codegen_vec4_mul_op_mov; 205 break; 206 case ppir_op_max: 207 f->op = ppir_codegen_vec4_mul_op_max; 208 break; 209 case ppir_op_min: 210 f->op = ppir_codegen_vec4_mul_op_min; 211 break; 212 case ppir_op_and: 213 f->op = ppir_codegen_vec4_mul_op_and; 214 break; 215 case ppir_op_or: 216 f->op = ppir_codegen_vec4_mul_op_or; 217 break; 218 case ppir_op_xor: 219 f->op = ppir_codegen_vec4_mul_op_xor; 220 break; 221 case ppir_op_gt: 222 f->op = ppir_codegen_vec4_mul_op_gt; 223 break; 224 case ppir_op_ge: 225 f->op = ppir_codegen_vec4_mul_op_ge; 226 break; 227 case ppir_op_eq: 228 f->op = ppir_codegen_vec4_mul_op_eq; 229 break; 230 case ppir_op_ne: 231 f->op = ppir_codegen_vec4_mul_op_ne; 232 break; 233 case ppir_op_not: 234 f->op = ppir_codegen_vec4_mul_op_not; 235 break; 236 default: 237 break; 238 } 239 240 ppir_src *src = alu->src; 241 int index = ppir_target_get_src_reg_index(src); 242 f->arg0_source = index >> 2; 243 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 244 f->arg0_absolute = src->absolute; 245 f->arg0_negate = src->negate; 246 247 if (alu->num_src == 2) { 248 src = alu->src + 1; 249 index = ppir_target_get_src_reg_index(src); 250 f->arg1_source = index >> 2; 251 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 252 f->arg1_absolute = src->absolute; 253 f->arg1_negate = src->negate; 254 } 255} 256 257static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code) 258{ 259 ppir_codegen_field_float_mul *f = code; 260 ppir_alu_node *alu = ppir_node_to_alu(node); 261 262 ppir_dest *dest = &alu->dest; 263 int dest_component = ffs(dest->write_mask) - 1; 264 assert(dest_component >= 0); 265 266 if (dest->type != ppir_target_pipeline) { 267 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; 268 f->output_en = true; 269 } 270 f->dest_modifier = dest->modifier; 271 272 switch (node->op) { 273 case ppir_op_mul: 274 f->op = shift_to_op(alu->shift); 275 break; 276 case ppir_op_mov: 277 f->op = ppir_codegen_float_mul_op_mov; 278 break; 279 case ppir_op_max: 280 f->op = ppir_codegen_float_mul_op_max; 281 break; 282 case ppir_op_min: 283 f->op = ppir_codegen_float_mul_op_min; 284 break; 285 case ppir_op_and: 286 f->op = ppir_codegen_float_mul_op_and; 287 break; 288 case ppir_op_or: 289 f->op = ppir_codegen_float_mul_op_or; 290 break; 291 case ppir_op_xor: 292 f->op = ppir_codegen_float_mul_op_xor; 293 break; 294 case ppir_op_gt: 295 f->op = ppir_codegen_float_mul_op_gt; 296 break; 297 case ppir_op_ge: 298 f->op = ppir_codegen_float_mul_op_ge; 299 break; 300 case ppir_op_eq: 301 f->op = ppir_codegen_float_mul_op_eq; 302 break; 303 case ppir_op_ne: 304 f->op = ppir_codegen_float_mul_op_ne; 305 break; 306 case ppir_op_not: 307 f->op = ppir_codegen_float_mul_op_not; 308 break; 309 default: 310 break; 311 } 312 313 ppir_src *src = alu->src; 314 f->arg0_source = get_scl_reg_index(src, dest_component); 315 f->arg0_absolute = src->absolute; 316 f->arg0_negate = src->negate; 317 318 if (alu->num_src == 2) { 319 src = alu->src + 1; 320 f->arg1_source = get_scl_reg_index(src, dest_component); 321 f->arg1_absolute = src->absolute; 322 f->arg1_negate = src->negate; 323 } 324} 325 326static void ppir_codegen_encode_vec_add(ppir_node *node, void *code) 327{ 328 ppir_codegen_field_vec4_acc *f = code; 329 ppir_alu_node *alu = ppir_node_to_alu(node); 330 331 ppir_dest *dest = &alu->dest; 332 int index = ppir_target_get_dest_reg_index(dest); 333 int dest_shift = index & 0x3; 334 f->dest = index >> 2; 335 f->mask = dest->write_mask << dest_shift; 336 f->dest_modifier = dest->modifier; 337 338 switch (node->op) { 339 case ppir_op_add: 340 f->op = ppir_codegen_vec4_acc_op_add; 341 break; 342 case ppir_op_mov: 343 f->op = ppir_codegen_vec4_acc_op_mov; 344 break; 345 case ppir_op_sum3: 346 f->op = ppir_codegen_vec4_acc_op_sum3; 347 dest_shift = 0; 348 break; 349 case ppir_op_sum4: 350 f->op = ppir_codegen_vec4_acc_op_sum4; 351 dest_shift = 0; 352 break; 353 case ppir_op_floor: 354 f->op = ppir_codegen_vec4_acc_op_floor; 355 break; 356 case ppir_op_ceil: 357 f->op = ppir_codegen_vec4_acc_op_ceil; 358 break; 359 case ppir_op_fract: 360 f->op = ppir_codegen_vec4_acc_op_fract; 361 break; 362 case ppir_op_gt: 363 f->op = ppir_codegen_vec4_acc_op_gt; 364 break; 365 case ppir_op_ge: 366 f->op = ppir_codegen_vec4_acc_op_ge; 367 break; 368 case ppir_op_eq: 369 f->op = ppir_codegen_vec4_acc_op_eq; 370 break; 371 case ppir_op_ne: 372 f->op = ppir_codegen_vec4_acc_op_ne; 373 break; 374 case ppir_op_select: 375 f->op = ppir_codegen_vec4_acc_op_sel; 376 break; 377 case ppir_op_max: 378 f->op = ppir_codegen_vec4_acc_op_max; 379 break; 380 case ppir_op_min: 381 f->op = ppir_codegen_vec4_acc_op_min; 382 break; 383 case ppir_op_ddx: 384 f->op = ppir_codegen_vec4_acc_op_dFdx; 385 break; 386 case ppir_op_ddy: 387 f->op = ppir_codegen_vec4_acc_op_dFdy; 388 break; 389 default: 390 break; 391 } 392 393 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src; 394 index = ppir_target_get_src_reg_index(src); 395 396 if (src->type == ppir_target_pipeline && 397 src->pipeline == ppir_pipeline_reg_vmul) 398 f->mul_in = true; 399 else 400 f->arg0_source = index >> 2; 401 402 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 403 f->arg0_absolute = src->absolute; 404 f->arg0_negate = src->negate; 405 406 if (++src < alu->src + alu->num_src) { 407 index = ppir_target_get_src_reg_index(src); 408 f->arg1_source = index >> 2; 409 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 410 f->arg1_absolute = src->absolute; 411 f->arg1_negate = src->negate; 412 } 413} 414 415static void ppir_codegen_encode_scl_add(ppir_node *node, void *code) 416{ 417 ppir_codegen_field_float_acc *f = code; 418 ppir_alu_node *alu = ppir_node_to_alu(node); 419 420 ppir_dest *dest = &alu->dest; 421 int dest_component = ffs(dest->write_mask) - 1; 422 assert(dest_component >= 0); 423 424 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; 425 f->output_en = true; 426 f->dest_modifier = dest->modifier; 427 428 switch (node->op) { 429 case ppir_op_add: 430 f->op = shift_to_op(alu->shift); 431 break; 432 case ppir_op_mov: 433 f->op = ppir_codegen_float_acc_op_mov; 434 break; 435 case ppir_op_max: 436 f->op = ppir_codegen_float_acc_op_max; 437 break; 438 case ppir_op_min: 439 f->op = ppir_codegen_float_acc_op_min; 440 break; 441 case ppir_op_floor: 442 f->op = ppir_codegen_float_acc_op_floor; 443 break; 444 case ppir_op_ceil: 445 f->op = ppir_codegen_float_acc_op_ceil; 446 break; 447 case ppir_op_fract: 448 f->op = ppir_codegen_float_acc_op_fract; 449 break; 450 case ppir_op_gt: 451 f->op = ppir_codegen_float_acc_op_gt; 452 break; 453 case ppir_op_ge: 454 f->op = ppir_codegen_float_acc_op_ge; 455 break; 456 case ppir_op_eq: 457 f->op = ppir_codegen_float_acc_op_eq; 458 break; 459 case ppir_op_ne: 460 f->op = ppir_codegen_float_acc_op_ne; 461 break; 462 case ppir_op_select: 463 f->op = ppir_codegen_float_acc_op_sel; 464 break; 465 case ppir_op_ddx: 466 f->op = ppir_codegen_float_acc_op_dFdx; 467 break; 468 case ppir_op_ddy: 469 f->op = ppir_codegen_float_acc_op_dFdy; 470 break; 471 default: 472 break; 473 } 474 475 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src; 476 if (src->type == ppir_target_pipeline && 477 src->pipeline == ppir_pipeline_reg_fmul) 478 f->mul_in = true; 479 else 480 f->arg0_source = get_scl_reg_index(src, dest_component); 481 f->arg0_absolute = src->absolute; 482 f->arg0_negate = src->negate; 483 484 if (++src < alu->src + alu->num_src) { 485 f->arg1_source = get_scl_reg_index(src, dest_component); 486 f->arg1_absolute = src->absolute; 487 f->arg1_negate = src->negate; 488 } 489} 490 491static void ppir_codegen_encode_combine(ppir_node *node, void *code) 492{ 493 ppir_codegen_field_combine *f = code; 494 ppir_alu_node *alu = ppir_node_to_alu(node); 495 496 switch (node->op) { 497 case ppir_op_rsqrt: 498 case ppir_op_log2: 499 case ppir_op_exp2: 500 case ppir_op_rcp: 501 case ppir_op_sqrt: 502 case ppir_op_sin: 503 case ppir_op_cos: 504 { 505 f->scalar.dest_vec = false; 506 f->scalar.arg1_en = false; 507 508 ppir_dest *dest = &alu->dest; 509 int dest_component = ffs(dest->write_mask) - 1; 510 assert(dest_component >= 0); 511 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component; 512 f->scalar.dest_modifier = dest->modifier; 513 514 ppir_src *src = alu->src; 515 f->scalar.arg0_src = get_scl_reg_index(src, dest_component); 516 f->scalar.arg0_absolute = src->absolute; 517 f->scalar.arg0_negate = src->negate; 518 519 switch (node->op) { 520 case ppir_op_rsqrt: 521 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt; 522 break; 523 case ppir_op_log2: 524 f->scalar.op = ppir_codegen_combine_scalar_op_log2; 525 break; 526 case ppir_op_exp2: 527 f->scalar.op = ppir_codegen_combine_scalar_op_exp2; 528 break; 529 case ppir_op_rcp: 530 f->scalar.op = ppir_codegen_combine_scalar_op_rcp; 531 break; 532 case ppir_op_sqrt: 533 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt; 534 break; 535 case ppir_op_sin: 536 f->scalar.op = ppir_codegen_combine_scalar_op_sin; 537 break; 538 case ppir_op_cos: 539 f->scalar.op = ppir_codegen_combine_scalar_op_cos; 540 break; 541 default: 542 break; 543 } 544 break; 545 } 546 default: 547 break; 548 } 549} 550 551static void ppir_codegen_encode_store_temp(ppir_node *node, void *code) 552{ 553 assert(node->op == ppir_op_store_temp); 554 555 ppir_codegen_field_temp_write *f = code; 556 ppir_store_node *snode = ppir_node_to_store(node); 557 int num_components = snode->num_components; 558 559 f->temp_write.dest = 0x03; // 11 - temporary 560 f->temp_write.source = snode->src.reg->index; 561 562 int alignment = num_components == 4 ? 2 : num_components - 1; 563 f->temp_write.alignment = alignment; 564 f->temp_write.index = snode->index << (2 - alignment); 565 566 f->temp_write.offset_reg = snode->index >> 2; 567} 568 569static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code) 570{ 571 for (int i = 0; i < constant->num; i++) 572 code[i] = _mesa_float_to_half(constant->value[i].f); 573} 574 575static void ppir_codegen_encode_discard(ppir_node *node, void *code) 576{ 577 ppir_codegen_field_branch *b = code; 578 assert(node->op == ppir_op_discard); 579 580 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0; 581 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1; 582 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2; 583} 584 585static void ppir_codegen_encode_branch(ppir_node *node, void *code) 586{ 587 ppir_codegen_field_branch *b = code; 588 ppir_branch_node *branch; 589 ppir_instr *target_instr; 590 ppir_block *target; 591 if (node->op == ppir_op_discard) { 592 ppir_codegen_encode_discard(node, code); 593 return; 594 } 595 596 assert(node->op == ppir_op_branch); 597 branch = ppir_node_to_branch(node); 598 599 b->branch.unknown_0 = 0x0; 600 b->branch.unknown_1 = 0x0; 601 602 if (branch->num_src == 2) { 603 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0); 604 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0); 605 b->branch.cond_gt = branch->cond_gt; 606 b->branch.cond_eq = branch->cond_eq; 607 b->branch.cond_lt = branch->cond_lt; 608 } else if (branch->num_src == 0) { 609 /* Unconditional branch */ 610 b->branch.arg0_source = 0; 611 b->branch.arg1_source = 0; 612 b->branch.cond_gt = true; 613 b->branch.cond_eq = true; 614 b->branch.cond_lt = true; 615 } else { 616 assert(false); 617 } 618 619 target = branch->target; 620 while (list_is_empty(&target->instr_list)) { 621 if (!target->list.next) 622 break; 623 target = LIST_ENTRY(ppir_block, target->list.next, list); 624 } 625 626 assert(!list_is_empty(&target->instr_list)); 627 628 target_instr = list_first_entry(&target->instr_list, ppir_instr, list); 629 b->branch.target = target_instr->offset - node->instr->offset; 630 b->branch.next_count = target_instr->encode_size; 631} 632 633typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *); 634 635static const ppir_codegen_instr_slot_encode_func 636ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = { 637 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying, 638 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld, 639 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform, 640 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul, 641 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul, 642 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add, 643 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add, 644 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine, 645 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp, 646 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch, 647}; 648 649static const int ppir_codegen_field_size[] = { 650 34, 62, 41, 43, 30, 44, 31, 30, 41, 73 651}; 652 653static inline int align_to_word(int size) 654{ 655 return ((size + 0x1f) >> 5); 656} 657 658static int get_instr_encode_size(ppir_instr *instr) 659{ 660 int size = 0; 661 662 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 663 if (instr->slots[i]) 664 size += ppir_codegen_field_size[i]; 665 } 666 667 for (int i = 0; i < 2; i++) { 668 if (instr->constant[i].num) 669 size += 64; 670 } 671 672 return align_to_word(size) + 1; 673} 674 675static void bitcopy(void *dst, int dst_offset, void *src, int src_size) 676{ 677 int off1 = dst_offset & 0x1f; 678 uint32_t *cpy_dst = dst, *cpy_src = src; 679 680 cpy_dst += (dst_offset >> 5); 681 682 if (off1) { 683 int off2 = 32 - off1; 684 int cpy_size = 0; 685 while (1) { 686 *cpy_dst |= *cpy_src << off1; 687 cpy_dst++; 688 689 cpy_size += off2; 690 if (cpy_size >= src_size) 691 break; 692 693 *cpy_dst |= *cpy_src >> off2; 694 cpy_src++; 695 696 cpy_size += off1; 697 if (cpy_size >= src_size) 698 break; 699 } 700 } 701 else 702 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4); 703} 704 705static int encode_instr(ppir_instr *instr, void *code, void *last_code) 706{ 707 int size = 0; 708 ppir_codegen_ctrl *ctrl = code; 709 710 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 711 if (instr->slots[i]) { 712 /* max field size (73), align to dword */ 713 uint8_t output[12] = {0}; 714 715 ppir_codegen_encode_slot[i](instr->slots[i], output); 716 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]); 717 718 size += ppir_codegen_field_size[i]; 719 ctrl->fields |= 1 << i; 720 } 721 } 722 723 if (instr->slots[PPIR_INSTR_SLOT_TEXLD]) 724 ctrl->sync = true; 725 726 if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) { 727 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]; 728 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy) 729 ctrl->sync = true; 730 } 731 732 if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) { 733 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]; 734 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy) 735 ctrl->sync = true; 736 } 737 738 for (int i = 0; i < 2; i++) { 739 if (instr->constant[i].num) { 740 uint16_t output[4] = {0}; 741 742 ppir_codegen_encode_const(instr->constant + i, output); 743 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16); 744 745 size += 64; 746 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i); 747 } 748 } 749 750 size = align_to_word(size) + 1; 751 752 ctrl->count = size; 753 if (instr->is_end) 754 ctrl->stop = true; 755 756 if (last_code) { 757 ppir_codegen_ctrl *last_ctrl = last_code; 758 last_ctrl->next_count = size; 759 last_ctrl->prefetch = true; 760 } 761 762 return size; 763} 764 765static void ppir_codegen_print_prog(ppir_compiler *comp) 766{ 767 uint32_t *prog = comp->prog->shader; 768 unsigned offset = 0; 769 770 printf("========ppir codegen========\n"); 771 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 772 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 773 printf("%03d (@%6d): ", instr->index, instr->offset); 774 int n = prog[0] & 0x1f; 775 for (int i = 0; i < n; i++) { 776 if (i && i % 6 == 0) 777 printf("\n "); 778 printf("%08x ", prog[i]); 779 } 780 printf("\n"); 781 ppir_disassemble_instr(prog, offset, stdout); 782 prog += n; 783 offset += n; 784 } 785 } 786 printf("-----------------------\n"); 787} 788 789bool ppir_codegen_prog(ppir_compiler *comp) 790{ 791 int size = 0; 792 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 793 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 794 instr->offset = size; 795 instr->encode_size = get_instr_encode_size(instr); 796 size += instr->encode_size; 797 } 798 } 799 800 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t)); 801 if (!prog) 802 return false; 803 804 uint32_t *code = prog, *last_code = NULL; 805 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 806 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 807 int offset = encode_instr(instr, code, last_code); 808 last_code = code; 809 code += offset; 810 } 811 } 812 813 if (comp->prog->shader) 814 ralloc_free(comp->prog->shader); 815 816 comp->prog->shader = prog; 817 comp->prog->state.shader_size = size * sizeof(uint32_t); 818 819 if (lima_debug & LIMA_DEBUG_PP) 820 ppir_codegen_print_prog(comp); 821 822 return true; 823} 824