Lines Matching refs:instr

84                                       aco::Instruction* instr) -> void
94 aco_print_instr(instr, memf);
114 for (aco_ptr<Instruction>& instr : block.instructions) {
117 Format base_format = instr->format;
127 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
128 instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
129 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
130 instr->opcode == aco_opcode::v_interp_p2_f16) {
139 check(base_format == instr_info.format[(int)instr->opcode],
140 "Wrong base format for instruction", instr.get());
143 if (instr->isVOP3() && instr->format != Format::VOP3) {
146 "Format cannot have VOP3/VOP3B applied", instr.get());
150 if (instr->isSDWA()) {
153 "Format cannot have SDWA applied", instr.get());
155 check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());
157 SDWA_instruction& sdwa = instr->sdwa();
159 "SDWA omod only supported on GFX9+", instr.get());
162 "SDWA VOPC clamp only supported on GFX8", instr.get());
163 check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
165 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
167 const Definition& def = instr->definitions[0];
169 instr.get());
171 "SDWA definition selection size must be at most definition size", instr.get());
174 "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
176 instr.get());
179 instr.get());
181 "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
184 for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
185 const Operand& op = instr->operands[i];
186 check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
188 "SDWA operand selection size must be at most operand size", instr.get());
190 "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
192 instr.get());
194 if (instr->operands.size() >= 3) {
195 check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
196 "3rd operand must be fixed to vcc with SDWA", instr.get());
198 if (instr->definitions.size() >= 2) {
199 check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
200 "2nd definition must be fixed to vcc with SDWA", instr.get());
204 instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
205 instr->opcode != aco_opcode::v_fmamk_f32 &&
206 instr->opcode != aco_opcode::v_fmaak_f32 &&
207 instr->opcode != aco_opcode::v_fmamk_f16 &&
208 instr->opcode != aco_opcode::v_fmaak_f16 &&
209 instr->opcode != aco_opcode::v_madmk_f32 &&
210 instr->opcode != aco_opcode::v_madak_f32 &&
211 instr->opcode != aco_opcode::v_madmk_f16 &&
212 instr->opcode != aco_opcode::v_madak_f16 &&
213 instr->opcode != aco_opcode::v_readfirstlane_b32 &&
214 instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
218 (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
220 check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
224 if (instr->isVOP3()) {
225 VOP3_instruction& vop3 = instr->vop3();
227 "Opsel is only supported on GFX9+", instr.get());
230 if (i >= instr->operands.size() ||
231 (instr->operands[i].hasRegClass() &&
232 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
233 check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
235 if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
237 instr.get());
238 } else if (instr->isVOP3P()) {
239 VOP3P_instruction& vop3p = instr->vop3p();
240 for (unsigned i = 0; i < instr->operands.size(); i++) {
241 if (instr->operands[i].hasRegClass() &&
242 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
244 "Unexpected opsel for subdword operand", instr.get());
246 check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
247 instr.get());
251 for (unsigned i = 0; i < instr->operands.size(); i++) {
252 if (instr->operands[i].isUndefined()) {
253 bool flat = instr->isFlatLike();
254 bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
255 instr->opcode == aco_opcode::p_create_vector ||
256 (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
257 ((instr->isMUBUF() || instr->isMTBUF()) && i == 1);
258 check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
260 check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
261 instr->operands[i].isConstant(),
262 "Uninitialized Operand", instr.get());
267 for (unsigned i = 0; i < instr->definitions.size(); i++) {
268 if (instr->definitions[i].regClass().is_subdword())
269 check(instr->isPseudo() || instr->definitions[i].bytes() <= 4,
271 instr.get());
274 if (instr->isSALU() || instr->isVALU()) {
277 for (unsigned i = 0; i < instr->operands.size(); i++) {
278 Operand op = instr->operands[i];
282 check(!instr->isDPP() && !instr->isSDWA() &&
283 (!instr->isVOP3() || program->chip_class >= GFX10) &&
284 (!instr->isVOP3P() || program->chip_class >= GFX10),
285 "Literal applied on wrong instruction format", instr.get());
289 "Only 1 Literal allowed", instr.get());
291 check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
292 "Wrong source position for Literal argument", instr.get());
296 if (instr->isVALU()) {
297 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
298 instr->opcode == aco_opcode::v_lshrrev_b64 ||
299 instr->opcode == aco_opcode::v_ashrrev_i64;
304 uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
305 if (instr->isSDWA())
307 else if (instr->isDPP())
310 if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
311 instr->opcode == aco_opcode::v_readlane_b32 ||
312 instr->opcode == aco_opcode::v_readlane_b32_e64) {
313 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
314 "Wrong Definition type for VALU instruction", instr.get());
316 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
317 "Wrong Definition type for VALU instruction", instr.get());
322 for (unsigned i = 0; i < instr->operands.size(); i++) {
323 Operand op = instr->operands[i];
324 if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
325 instr->opcode == aco_opcode::v_readlane_b32 ||
326 instr->opcode == aco_opcode::v_readlane_b32_e64) {
329 "Must be a SGPR or a constant", instr.get());
332 "Wrong Operand type for VALU instruction", instr.get());
335 if (instr->opcode == aco_opcode::v_permlane16_b32 ||
336 instr->opcode == aco_opcode::v_permlanex16_b32) {
338 "Operand 0 of v_permlane must be VGPR", instr.get());
342 instr.get());
345 if (instr->opcode == aco_opcode::v_writelane_b32 ||
346 instr->opcode == aco_opcode::v_writelane_b32_e64) {
349 "Wrong Operand type for VALU instruction", instr.get());
352 "Must be a SGPR or a constant", instr.get());
355 if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
357 instr.get());
367 instr.get());
370 "Too many SGPRs/literals", instr.get());
373 if (instr->isSOP1() || instr->isSOP2()) {
374 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
375 "Wrong Definition type for SALU instruction", instr.get());
376 for (const Operand& op : instr->operands) {
378 "Wrong Operand type for SALU instruction", instr.get());
383 switch (instr->format) {
385 if (instr->opcode == aco_opcode::p_create_vector) {
387 for (const Operand& op : instr->operands) {
388 check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
391 check(size == instr->definitions[0].bytes(),
392 "Definition size does not match operand sizes", instr.get());
393 if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
394 for (const Operand& op : instr->operands) {
396 "Wrong Operand type for scalar vector", instr.get());
399 } else if (instr->opcode == aco_opcode::p_extract_vector) {
400 check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),
401 "Wrong Operand types", instr.get());
402 check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
403 instr->operands[0].bytes(),
404 "Index out of range", instr.get());
405 check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
406 instr->operands[0].regClass().type() == RegType::sgpr,
407 "Cannot extract SGPR value from VGPR vector", instr.get());
409 !instr->definitions[0].regClass().is_subdword() ||
410 instr->operands[0].regClass().type() == RegType::vgpr,
411 "Cannot extract subdword from SGPR before GFX9+", instr.get());
412 } else if (instr->opcode == aco_opcode::p_split_vector) {
413 check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());
415 for (const Definition& def : instr->definitions) {
418 check(size == instr->operands[0].bytes(),
419 "Operand size does not match definition sizes", instr.get());
420 if (instr->operands[0].getTemp().type() == RegType::vgpr) {
421 for (const Definition& def : instr->definitions)
423 "Wrong Definition type for VGPR split_vector", instr.get());
425 for (const Definition& def : instr->definitions)
427 "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
429 } else if (instr->opcode == aco_opcode::p_parallelcopy) {
430 check(instr->definitions.size() == instr->operands.size(),
431 "Number of Operands does not match number of Definitions", instr.get());
432 for (unsigned i = 0; i < instr->operands.size(); i++) {
433 check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
434 "Operand and Definition size must match", instr.get());
435 if (instr->operands[i].isTemp()) {
436 check((instr->definitions[i].getTemp().type() ==
437 instr->operands[i].regClass().type()) ||
438 (instr->definitions[i].getTemp().type() == RegType::vgpr &&
439 instr->operands[i].regClass().type() == RegType::sgpr),
440 "Operand and Definition types do not match", instr.get());
441 check(instr->definitions[i].regClass().is_linear_vgpr() ==
442 instr->operands[i].regClass().is_linear_vgpr(),
443 "Operand and Definition types do not match", instr.get());
445 check(!instr->definitions[i].regClass().is_linear_vgpr(),
447 instr.get());
450 } else if (instr->opcode == aco_opcode::p_phi) {
451 check(instr->operands.size() == block.logical_preds.size(),
452 "Number of Operands does not match number of predecessors", instr.get());
453 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
454 "Logical Phi Definition must be vgpr", instr.get());
455 for (const Operand& op : instr->operands)
456 check(instr->definitions[0].size() == op.size(),
457 "Operand sizes must match Definition size", instr.get());
458 } else if (instr->opcode == aco_opcode::p_linear_phi) {
459 for (const Operand& op : instr->operands) {
461 instr.get());
462 check(instr->definitions[0].size() == op.size(),
463 "Operand sizes must match Definition size", instr.get());
465 check(instr->operands.size() == block.linear_preds.size(),
466 "Number of Operands does not match number of predecessors", instr.get());
467 } else if (instr->opcode == aco_opcode::p_extract ||
468 instr->opcode == aco_opcode::p_insert) {
469 check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());
470 check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
471 if (instr->opcode == aco_opcode::p_extract)
472 check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
473 instr.get());
475 check(instr->definitions[0].getTemp().type() != RegType::sgpr ||
476 instr->operands[0].getTemp().type() == RegType::sgpr,
477 "Can't extract/insert VGPR to SGPR", instr.get());
479 if (instr->opcode == aco_opcode::p_insert)
480 check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
481 "Sizes of p_insert data operand and definition must match", instr.get());
483 if (instr->definitions[0].getTemp().type() == RegType::sgpr)
484 check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
485 instr->definitions[1].physReg() == scc,
486 "SGPR extract/insert needs an SCC definition", instr.get());
488 unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u;
489 unsigned op_bits = instr->operands[2].constantValue();
491 if (instr->opcode == aco_opcode::p_insert) {
492 check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
493 check(op_bits < data_bits, "Size must be smaller than source", instr.get());
494 } else if (instr->opcode == aco_opcode::p_extract) {
496 "Size must be 8 or 16 or 32", instr.get());
498 instr.get());
502 check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
503 instr.get());
508 for (const Operand& op : instr->operands)
511 instr.get());
513 if (instr->opcode == aco_opcode::p_reduce &&
514 instr->reduction().cluster_size == program->wave_size)
515 check(instr->definitions[0].regClass().type() == RegType::sgpr ||
517 "The result of unclustered reductions must go into an SGPR.", instr.get());
519 check(instr->definitions[0].regClass().type() == RegType::vgpr,
521 instr.get());
526 if (instr->operands.size() >= 1)
527 check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||
528 (instr->operands[0].isTemp() &&
529 instr->operands[0].regClass().type() == RegType::sgpr),
530 "SMEM operands must be sgpr", instr.get());
531 if (instr->operands.size() >= 2)
532 check(instr->operands[1].isConstant() ||
533 (instr->operands[1].isTemp() &&
534 instr->operands[1].regClass().type() == RegType::sgpr),
535 "SMEM offset must be constant or sgpr", instr.get());
536 if (!instr->definitions.empty())
537 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
538 "SMEM result must be sgpr", instr.get());
543 check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
544 instr.get());
545 check(instr->operands[1].hasRegClass() &&
546 instr->operands[1].regClass().type() == RegType::vgpr,
547 "VADDR must be in vgpr for VMEM instructions", instr.get());
549 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,
550 "VMEM resource constant must be sgpr", instr.get());
551 check(instr->operands.size() < 4 ||
552 (instr->operands[3].isTemp() &&
553 instr->operands[3].regClass().type() == RegType::vgpr),
554 "VMEM write data must be vgpr", instr.get());
558 check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
559 instr.get());
560 check(instr->operands[0].hasRegClass() &&
561 (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
562 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
563 if (instr->operands[1].hasRegClass())
564 check(instr->operands[1].regClass() == s4,
565 "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
566 if (!instr->operands[2].isUndefined()) {
567 bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
568 instr->opcode == aco_opcode::image_atomic_fcmpswap;
569 check(instr->definitions.empty() ||
570 (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
574 instr.get());
576 check(instr->operands.size() == 4 || program->chip_class >= GFX10,
577 "NSA is only supported on GFX10+", instr.get());
578 for (unsigned i = 3; i < instr->operands.size(); i++) {
579 if (instr->operands.size() == 4) {
580 check(instr->operands[i].hasRegClass() &&
581 instr->operands[i].regClass().type() == RegType::vgpr,
582 "MIMG operands[3] (VADDR) must be VGPR", instr.get());
584 check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",
585 instr.get());
588 check(instr->definitions.empty() ||
589 (instr->definitions[0].isTemp() &&
590 instr->definitions[0].regClass().type() == RegType::vgpr),
591 "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
595 for (const Operand& op : instr->operands) {
597 "Only VGPRs are valid DS instruction operands", instr.get());
599 if (!instr->definitions.empty())
600 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
601 "DS instruction must return VGPR", instr.get());
606 check(instr->operands[i].hasRegClass() &&
607 instr->operands[i].regClass().type() == RegType::vgpr,
608 "Only VGPRs are valid Export arguments", instr.get());
612 check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
613 instr.get());
618 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,
619 "FLAT/GLOBAL/SCRATCH address must be vgpr", instr.get());
620 check(instr->operands[1].hasRegClass() &&
621 instr->operands[1].regClass().type() == RegType::sgpr,
622 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
623 if (!instr->definitions.empty())
624 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
625 "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
627 check(instr->operands[2].regClass().type() == RegType::vgpr,
628 "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
673 Location() : block(NULL), instr(NULL) {}
676 Instruction* instr; // NULL if it's the block's live-in
701 if (loc.instr) {
702 aco_print_instr(loc.instr, memf);
709 aco_print_instr(loc2.instr, memf);
721 validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, unsigned index)
723 Operand op = instr->operands[index];
726 if (instr->opcode == aco_opcode::p_as_uniform)
728 if (instr->isPseudo() && chip >= GFX8)
730 if (instr->isSDWA())
731 return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
732 byte % instr->sdwa().sel[index].size() == 0;
733 if (instr->isVOP3P())
734 return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
735 ((instr->vop3p().opsel_hi >> index) & 1) == (byte >> 1);
736 if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
739 switch (instr->opcode) {
778 validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)
780 Definition def = instr->definitions[0];
783 if (instr->isPseudo() && chip >= GFX8)
785 if (instr->isSDWA())
786 return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
787 byte % instr->sdwa().dst_sel.size() == 0;
788 if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
791 switch (instr->opcode) {
809 get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
812 Definition def = instr->definitions[index];
814 if (instr->isPseudo())
816 if (instr->isVALU()) {
818 if (instr->isSDWA())
819 return instr->sdwa().dst_sel.size();
821 if (instr_is_16bit(chip, instr->opcode))
827 switch (instr->opcode) {
869 for (aco_ptr<Instruction>& instr : block.instructions) {
870 if (instr->opcode == aco_opcode::p_phi) {
871 for (unsigned i = 0; i < instr->operands.size(); i++) {
872 if (instr->operands[i].isTemp() &&
873 instr->operands[i].getTemp().type() == RegType::sgpr &&
874 instr->operands[i].isFirstKill())
875 phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
879 loc.instr = instr.get();
880 for (unsigned i = 0; i < instr->operands.size(); i++) {
881 Operand& op = instr->operands[i];
901 !validate_subdword_operand(program->chip_class, instr, i))
909 for (unsigned i = 0; i < instr->definitions.size(); i++) {
910 Definition& def = instr->definitions[i];
930 !validate_subdword_definition(program->chip_class, instr))
969 aco_ptr<Instruction>& instr = *it;
972 if (instr->opcode == aco_opcode::p_logical_end) {
986 for (const Definition& def : instr->definitions) {
994 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
995 for (const Operand& op : instr->operands) {
1009 for (aco_ptr<Instruction>& instr : block.instructions) {
1010 loc.instr = instr.get();
1013 if (instr->opcode == aco_opcode::p_logical_end) {
1021 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1022 for (const Operand& op : instr->operands) {
1032 for (unsigned i = 0; i < instr->definitions.size(); i++) {
1033 Definition& def = instr->definitions[i];
1047 unsigned written = get_subdword_bytes_written(program, instr, i);
1061 for (const Definition& def : instr->definitions) {
1070 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1071 for (const Operand& op : instr->operands) {