1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "compiler/nir/nir.h" 36#include "util/list.h" 37#include "util/u_math.h" 38 39#include "vc4_screen.h" 40#include "vc4_qpu_defines.h" 41#include "vc4_qpu.h" 42#include "kernel/vc4_packet.h" 43#include "pipe/p_state.h" 44 45struct nir_builder; 46 47enum qfile { 48 QFILE_NULL, 49 QFILE_TEMP, 50 QFILE_VARY, 51 QFILE_UNIF, 52 QFILE_VPM, 53 QFILE_TLB_COLOR_WRITE, 54 QFILE_TLB_COLOR_WRITE_MS, 55 QFILE_TLB_Z_WRITE, 56 QFILE_TLB_STENCIL_SETUP, 57 58 /* If tex_s is written on its own without preceding t/r/b setup, it's 59 * a direct memory access using the input value, without the sideband 60 * uniform load. We represent these in QIR as a separate write 61 * destination so we can tell if the sideband uniform is present. 62 */ 63 QFILE_TEX_S_DIRECT, 64 65 QFILE_TEX_S, 66 QFILE_TEX_T, 67 QFILE_TEX_R, 68 QFILE_TEX_B, 69 70 /* Payload registers that aren't in the physical register file, so we 71 * can just use the corresponding qpu_reg at qpu_emit time. 72 */ 73 QFILE_FRAG_X, 74 QFILE_FRAG_Y, 75 QFILE_FRAG_REV_FLAG, 76 QFILE_QPU_ELEMENT, 77 78 /** 79 * Stores an immediate value in the index field that will be used 80 * directly by qpu_load_imm(). 81 */ 82 QFILE_LOAD_IMM, 83 84 /** 85 * Stores an immediate value in the index field that can be turned 86 * into a small immediate field by qpu_encode_small_immediate(). 87 */ 88 QFILE_SMALL_IMM, 89}; 90 91struct qreg { 92 enum qfile file; 93 uint32_t index; 94 int pack; 95}; 96 97static inline struct qreg qir_reg(enum qfile file, uint32_t index) 98{ 99 return (struct qreg){file, index}; 100} 101 102enum qop { 103 QOP_UNDEF, 104 QOP_MOV, 105 QOP_FMOV, 106 QOP_MMOV, 107 QOP_FADD, 108 QOP_FSUB, 109 QOP_FMUL, 110 QOP_V8MULD, 111 QOP_V8MIN, 112 QOP_V8MAX, 113 QOP_V8ADDS, 114 QOP_V8SUBS, 115 QOP_MUL24, 116 QOP_FMIN, 117 QOP_FMAX, 118 QOP_FMINABS, 119 QOP_FMAXABS, 120 QOP_ADD, 121 QOP_SUB, 122 QOP_SHL, 123 QOP_SHR, 124 QOP_ASR, 125 QOP_MIN, 126 QOP_MIN_NOIMM, 127 QOP_MAX, 128 QOP_AND, 129 QOP_OR, 130 QOP_XOR, 131 QOP_NOT, 132 133 QOP_FTOI, 134 QOP_ITOF, 135 QOP_RCP, 136 QOP_RSQ, 137 QOP_EXP2, 138 QOP_LOG2, 139 QOP_VW_SETUP, 140 QOP_VR_SETUP, 141 QOP_TLB_COLOR_READ, 142 QOP_MS_MASK, 143 QOP_VARY_ADD_C, 144 145 QOP_FRAG_Z, 146 QOP_FRAG_W, 147 148 /** 149 * Signal of texture read being necessary and then reading r4 into 150 * the destination 151 */ 152 QOP_TEX_RESULT, 153 154 /** 155 * Insert the signal for switching threads in a threaded fragment 156 * shader. No value can be live in an accumulator across a thrsw. 157 * 158 * At the QPU level, this will have several delay slots before the 159 * switch happens. Those slots are the responsibility of the 160 * scheduler. 161 */ 162 QOP_THRSW, 163 164 /* 32-bit immediate loaded to each SIMD channel */ 165 QOP_LOAD_IMM, 166 167 /* 32-bit immediate divided into 16 2-bit unsigned int values and 168 * loaded to each corresponding SIMD channel. 169 */ 170 QOP_LOAD_IMM_U2, 171 /* 32-bit immediate divided into 16 2-bit signed int values and 172 * loaded to each corresponding SIMD channel. 173 */ 174 QOP_LOAD_IMM_I2, 175 176 QOP_ROT_MUL, 177 178 /* Jumps to block->successor[0] if the qinst->cond (as a 179 * QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note 180 * that block->successor[1] may be unset if the condition is ALWAYS. 181 */ 182 QOP_BRANCH, 183 184 /* Emits an ADD from src[0] to src[1], where src[0] must be a 185 * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS, 186 * required by the kernel as part of its branch validation. 187 */ 188 QOP_UNIFORMS_RESET, 189}; 190 191struct queued_qpu_inst { 192 struct list_head link; 193 uint64_t inst; 194}; 195 196struct qinst { 197 struct list_head link; 198 199 enum qop op; 200 struct qreg dst; 201 struct qreg src[3]; 202 bool sf; 203 bool cond_is_exec_mask; 204 uint8_t cond; 205}; 206 207enum qstage { 208 /** 209 * Coordinate shader, runs during binning, before the VS, and just 210 * outputs position. 211 */ 212 QSTAGE_COORD, 213 QSTAGE_VERT, 214 QSTAGE_FRAG, 215}; 216 217enum quniform_contents { 218 /** 219 * Indicates that a constant 32-bit value is copied from the program's 220 * uniform contents. 221 */ 222 QUNIFORM_CONSTANT, 223 /** 224 * Indicates that the program's uniform contents are used as an index 225 * into the GL uniform storage. 226 */ 227 QUNIFORM_UNIFORM, 228 229 /** @{ 230 * Scaling factors from clip coordinates to relative to the viewport 231 * center. 232 * 233 * This is used by the coordinate and vertex shaders to produce the 234 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 235 * point offsets from the viewport ccenter. 236 */ 237 QUNIFORM_VIEWPORT_X_SCALE, 238 QUNIFORM_VIEWPORT_Y_SCALE, 239 /** @} */ 240 241 QUNIFORM_VIEWPORT_Z_OFFSET, 242 QUNIFORM_VIEWPORT_Z_SCALE, 243 244 QUNIFORM_USER_CLIP_PLANE, 245 246 /** 247 * A reference to a texture config parameter 0 uniform. 248 * 249 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 250 * defines texture type, miplevels, and such. It will be found as a 251 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 252 */ 253 QUNIFORM_TEXTURE_CONFIG_P0, 254 255 /** 256 * A reference to a texture config parameter 1 uniform. 257 * 258 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 259 * defines texture width, height, filters, and wrap modes. It will be 260 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 261 * sequence. 262 */ 263 QUNIFORM_TEXTURE_CONFIG_P1, 264 265 /** A reference to a texture config parameter 2 cubemap stride uniform */ 266 QUNIFORM_TEXTURE_CONFIG_P2, 267 268 QUNIFORM_TEXTURE_FIRST_LEVEL, 269 270 QUNIFORM_TEXTURE_MSAA_ADDR, 271 272 QUNIFORM_UBO0_ADDR, 273 QUNIFORM_UBO1_ADDR, 274 275 QUNIFORM_TEXRECT_SCALE_X, 276 QUNIFORM_TEXRECT_SCALE_Y, 277 278 QUNIFORM_TEXTURE_BORDER_COLOR, 279 280 QUNIFORM_BLEND_CONST_COLOR_X, 281 QUNIFORM_BLEND_CONST_COLOR_Y, 282 QUNIFORM_BLEND_CONST_COLOR_Z, 283 QUNIFORM_BLEND_CONST_COLOR_W, 284 QUNIFORM_BLEND_CONST_COLOR_RGBA, 285 QUNIFORM_BLEND_CONST_COLOR_AAAA, 286 287 QUNIFORM_STENCIL, 288 289 QUNIFORM_ALPHA_REF, 290 QUNIFORM_SAMPLE_MASK, 291 292 /* Placeholder uniform that will be updated by the kernel when used by 293 * an instruction writing to QPU_W_UNIFORMS_ADDRESS. 294 */ 295 QUNIFORM_UNIFORMS_ADDRESS, 296}; 297 298struct vc4_varying_slot { 299 uint8_t slot; 300 uint8_t swizzle; 301}; 302 303struct vc4_key { 304 struct vc4_uncompiled_shader *shader_state; 305 struct { 306 enum pipe_format format; 307 uint8_t swizzle[4]; 308 union { 309 struct { 310 unsigned compare_mode:1; 311 unsigned compare_func:3; 312 unsigned wrap_s:3; 313 unsigned wrap_t:3; 314 bool force_first_level:1; 315 }; 316 struct { 317 uint16_t msaa_width, msaa_height; 318 }; 319 }; 320 } tex[VC4_MAX_TEXTURE_SAMPLERS]; 321 uint8_t ucp_enables; 322}; 323 324struct vc4_fs_key { 325 struct vc4_key base; 326 enum pipe_format color_format; 327 bool depth_enabled; 328 bool stencil_enabled; 329 bool stencil_twoside; 330 bool stencil_full_writemasks; 331 bool is_points; 332 bool is_lines; 333 bool point_coord_upper_left; 334 bool light_twoside; 335 bool msaa; 336 bool sample_coverage; 337 bool sample_alpha_to_coverage; 338 bool sample_alpha_to_one; 339 uint8_t alpha_test_func; 340 uint8_t logicop_func; 341 uint32_t point_sprite_mask; 342 uint32_t ubo_1_size; 343 344 struct pipe_rt_blend_state blend; 345}; 346 347struct vc4_vs_key { 348 struct vc4_key base; 349 350 const struct vc4_fs_inputs *fs_inputs; 351 enum pipe_format attr_formats[8]; 352 bool is_coord; 353 bool per_vertex_point_size; 354 bool clamp_color; 355}; 356 357/** A basic block of QIR intructions. */ 358struct qblock { 359 struct list_head link; 360 361 struct list_head instructions; 362 struct list_head qpu_inst_list; 363 364 struct set *predecessors; 365 struct qblock *successors[2]; 366 367 int index; 368 369 /* Instruction IPs for the first and last instruction of the block. 370 * Set by vc4_qpu_schedule.c. 371 */ 372 uint32_t start_qpu_ip; 373 uint32_t end_qpu_ip; 374 375 /* Instruction IP for the branch instruction of the block. Set by 376 * vc4_qpu_schedule.c. 377 */ 378 uint32_t branch_qpu_ip; 379 380 /** @{ used by vc4_qir_live_variables.c */ 381 BITSET_WORD *def; 382 BITSET_WORD *use; 383 BITSET_WORD *live_in; 384 BITSET_WORD *live_out; 385 int start_ip, end_ip; 386 /** @} */ 387}; 388 389struct vc4_compile { 390 struct vc4_context *vc4; 391 nir_shader *s; 392 nir_function_impl *impl; 393 struct exec_list *cf_node_list; 394 395 /** 396 * Mapping from nir_register * or nir_ssa_def * to array of struct 397 * qreg for the values. 398 */ 399 struct hash_table *def_ht; 400 401 /* For each temp, the instruction generating its value. */ 402 struct qinst **defs; 403 uint32_t defs_array_size; 404 405 /** 406 * Inputs to the shader, arranged by TGSI declaration order. 407 * 408 * Not all fragment shader QFILE_VARY reads are present in this array. 409 */ 410 struct qreg *inputs; 411 struct qreg *outputs; 412 bool msaa_per_sample_output; 413 struct qreg color_reads[VC4_MAX_SAMPLES]; 414 struct qreg sample_colors[VC4_MAX_SAMPLES]; 415 uint32_t inputs_array_size; 416 uint32_t outputs_array_size; 417 uint32_t uniforms_array_size; 418 419 /* State for whether we're executing on each channel currently. 0 if 420 * yes, otherwise a block number + 1 that the channel jumped to. 421 */ 422 struct qreg execute; 423 424 struct qreg line_x, point_x, point_y; 425 /** boolean (~0 -> true) if the fragment has been discarded. */ 426 struct qreg discard; 427 struct qreg payload_FRAG_Z; 428 struct qreg payload_FRAG_W; 429 430 uint8_t vattr_sizes[8]; 431 432 /** 433 * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. 434 * 435 * This includes those that aren't part of the VPM varyings, like 436 * point/line coordinates. 437 */ 438 struct vc4_varying_slot *input_slots; 439 uint32_t num_input_slots; 440 uint32_t input_slots_array_size; 441 442 /** 443 * An entry per outputs[] in the VS indicating what the VARYING_SLOT_* 444 * of the output is. Used to emit from the VS in the order that the 445 * FS needs. 446 */ 447 struct vc4_varying_slot *output_slots; 448 449 struct pipe_shader_state *shader_state; 450 struct vc4_key *key; 451 struct vc4_fs_key *fs_key; 452 struct vc4_vs_key *vs_key; 453 454 /* Live ranges of temps. */ 455 int *temp_start, *temp_end; 456 457 uint32_t *uniform_data; 458 enum quniform_contents *uniform_contents; 459 uint32_t uniform_array_size; 460 uint32_t num_uniforms; 461 uint32_t num_outputs; 462 uint32_t num_texture_samples; 463 uint32_t output_position_index; 464 uint32_t output_color_index; 465 uint32_t output_point_size_index; 466 uint32_t output_sample_mask_index; 467 468 struct qreg undef; 469 enum qstage stage; 470 uint32_t num_temps; 471 472 struct list_head blocks; 473 int next_block_index; 474 struct qblock *cur_block; 475 struct qblock *loop_cont_block; 476 struct qblock *loop_break_block; 477 struct qblock *last_top_block; 478 479 struct list_head qpu_inst_list; 480 481 /* Pre-QPU-scheduled instruction containing the last THRSW */ 482 uint64_t *last_thrsw; 483 484 uint64_t *qpu_insts; 485 uint32_t qpu_inst_count; 486 uint32_t qpu_inst_size; 487 uint32_t num_inputs; 488 489 /** 490 * Number of inputs from num_inputs remaining to be queued to the read 491 * FIFO in the VS/CS. 492 */ 493 uint32_t num_inputs_remaining; 494 495 /* Number of inputs currently in the read FIFO for the VS/CS */ 496 uint32_t num_inputs_in_fifo; 497 498 /** Next offset in the VPM to read from in the VS/CS */ 499 uint32_t vpm_read_offset; 500 501 uint32_t program_id; 502 uint32_t variant_id; 503 504 /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH 505 * is used to hide texturing latency at the cost of limiting ourselves 506 * to the bottom half of physical reg space. 507 */ 508 bool fs_threaded; 509 510 bool last_thrsw_at_top_level; 511 512 bool failed; 513}; 514 515/* Special nir_load_input intrinsic index for loading the current TLB 516 * destination color. 517 */ 518#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000 519 520#define VC4_NIR_MS_MASK_OUTPUT 2000000000 521 522struct vc4_compile *qir_compile_init(void); 523void qir_compile_destroy(struct vc4_compile *c); 524struct qblock *qir_new_block(struct vc4_compile *c); 525void qir_set_emit_block(struct vc4_compile *c, struct qblock *block); 526void qir_link_blocks(struct qblock *predecessor, struct qblock *successor); 527struct qblock *qir_entry_block(struct vc4_compile *c); 528struct qblock *qir_exit_block(struct vc4_compile *c); 529struct qinst *qir_inst(enum qop op, struct qreg dst, 530 struct qreg src0, struct qreg src1); 531void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); 532struct qreg qir_uniform(struct vc4_compile *c, 533 enum quniform_contents contents, 534 uint32_t data); 535void qir_schedule_instructions(struct vc4_compile *c); 536void qir_reorder_uniforms(struct vc4_compile *c); 537void qir_emit_uniform_stream_resets(struct vc4_compile *c); 538 539struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst); 540struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst); 541 542struct qreg qir_get_temp(struct vc4_compile *c); 543void qir_calculate_live_intervals(struct vc4_compile *c); 544int qir_get_nsrc(struct qinst *inst); 545int qir_get_non_sideband_nsrc(struct qinst *inst); 546int qir_get_tex_uniform_src(struct qinst *inst); 547bool qir_reg_equals(struct qreg a, struct qreg b); 548bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 549bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 550bool qir_has_uniform_read(struct qinst *inst); 551bool qir_is_mul(struct qinst *inst); 552bool qir_is_raw_mov(struct qinst *inst); 553bool qir_is_tex(struct qinst *inst); 554bool qir_has_implicit_tex_uniform(struct qinst *inst); 555bool qir_is_float_input(struct qinst *inst); 556bool qir_depends_on_flags(struct qinst *inst); 557bool qir_writes_r4(struct qinst *inst); 558struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); 559uint8_t qir_channels_written(struct qinst *inst); 560 561void qir_dump(struct vc4_compile *c); 562void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 563char *qir_describe_uniform(enum quniform_contents contents, uint32_t data, 564 const uint32_t *uniforms); 565const char *qir_get_stage_name(enum qstage stage); 566 567void qir_validate(struct vc4_compile *c); 568 569void qir_optimize(struct vc4_compile *c); 570bool qir_opt_algebraic(struct vc4_compile *c); 571bool qir_opt_coalesce_ff_writes(struct vc4_compile *c); 572bool qir_opt_constant_folding(struct vc4_compile *c); 573bool qir_opt_copy_propagation(struct vc4_compile *c); 574bool qir_opt_dead_code(struct vc4_compile *c); 575bool qir_opt_peephole_sf(struct vc4_compile *c); 576bool qir_opt_small_immediates(struct vc4_compile *c); 577bool qir_opt_vpm(struct vc4_compile *c); 578void vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c); 579void vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c); 580nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b, 581 nir_ssa_def **srcs, int swiz); 582void vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c); 583void qir_lower_uniforms(struct vc4_compile *c); 584 585uint32_t qpu_schedule_instructions(struct vc4_compile *c); 586 587void qir_SF(struct vc4_compile *c, struct qreg src); 588 589static inline struct qreg 590qir_uniform_ui(struct vc4_compile *c, uint32_t ui) 591{ 592 return qir_uniform(c, QUNIFORM_CONSTANT, ui); 593} 594 595static inline struct qreg 596qir_uniform_f(struct vc4_compile *c, float f) 597{ 598 return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 599} 600 601#define QIR_ALU0(name) \ 602static inline struct qreg \ 603qir_##name(struct vc4_compile *c) \ 604{ \ 605 return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \ 606 c->undef, c->undef)); \ 607} \ 608static inline struct qinst * \ 609qir_##name##_dest(struct vc4_compile *c, struct qreg dest) \ 610{ \ 611 return qir_emit_nondef(c, qir_inst(QOP_##name, dest, \ 612 c->undef, c->undef)); \ 613} 614 615#define QIR_ALU1(name) \ 616static inline struct qreg \ 617qir_##name(struct vc4_compile *c, struct qreg a) \ 618{ \ 619 return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \ 620 a, c->undef)); \ 621} \ 622static inline struct qinst * \ 623qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ 624 struct qreg a) \ 625{ \ 626 return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, \ 627 c->undef)); \ 628} 629 630#define QIR_ALU2(name) \ 631static inline struct qreg \ 632qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 633{ \ 634 return qir_emit_def(c, qir_inst(QOP_##name, c->undef, a, b)); \ 635} \ 636static inline struct qinst * \ 637qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ 638 struct qreg a, struct qreg b) \ 639{ \ 640 return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, b)); \ 641} 642 643#define QIR_NODST_1(name) \ 644static inline struct qinst * \ 645qir_##name(struct vc4_compile *c, struct qreg a) \ 646{ \ 647 return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \ 648 a, c->undef)); \ 649} 650 651#define QIR_NODST_2(name) \ 652static inline struct qinst * \ 653qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 654{ \ 655 return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \ 656 a, b)); \ 657} 658 659#define QIR_PAYLOAD(name) \ 660static inline struct qreg \ 661qir_##name(struct vc4_compile *c) \ 662{ \ 663 struct qreg *payload = &c->payload_##name; \ 664 if (payload->file != QFILE_NULL) \ 665 return *payload; \ 666 *payload = qir_get_temp(c); \ 667 struct qinst *inst = qir_inst(QOP_##name, *payload, \ 668 c->undef, c->undef); \ 669 struct qblock *entry = qir_entry_block(c); \ 670 list_add(&inst->link, &entry->instructions); \ 671 c->defs[payload->index] = inst; \ 672 return *payload; \ 673} 674 675QIR_ALU1(MOV) 676QIR_ALU1(FMOV) 677QIR_ALU1(MMOV) 678QIR_ALU2(FADD) 679QIR_ALU2(FSUB) 680QIR_ALU2(FMUL) 681QIR_ALU2(V8MULD) 682QIR_ALU2(V8MIN) 683QIR_ALU2(V8MAX) 684QIR_ALU2(V8ADDS) 685QIR_ALU2(V8SUBS) 686QIR_ALU2(MUL24) 687QIR_ALU2(FMIN) 688QIR_ALU2(FMAX) 689QIR_ALU2(FMINABS) 690QIR_ALU2(FMAXABS) 691QIR_ALU1(FTOI) 692QIR_ALU1(ITOF) 693 694QIR_ALU2(ADD) 695QIR_ALU2(SUB) 696QIR_ALU2(SHL) 697QIR_ALU2(SHR) 698QIR_ALU2(ASR) 699QIR_ALU2(MIN) 700QIR_ALU2(MIN_NOIMM) 701QIR_ALU2(MAX) 702QIR_ALU2(AND) 703QIR_ALU2(OR) 704QIR_ALU2(XOR) 705QIR_ALU1(NOT) 706 707QIR_ALU1(RCP) 708QIR_ALU1(RSQ) 709QIR_ALU1(EXP2) 710QIR_ALU1(LOG2) 711QIR_ALU1(VARY_ADD_C) 712QIR_PAYLOAD(FRAG_Z) 713QIR_PAYLOAD(FRAG_W) 714QIR_ALU0(TEX_RESULT) 715QIR_ALU0(TLB_COLOR_READ) 716QIR_NODST_1(MS_MASK) 717 718static inline struct qreg 719qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1) 720{ 721 struct qreg t = qir_get_temp(c); 722 qir_MOV_dest(c, t, src1); 723 qir_MOV_dest(c, t, src0)->cond = cond; 724 return t; 725} 726 727static inline struct qreg 728qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 729{ 730 struct qreg t = qir_FMOV(c, src); 731 c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; 732 return t; 733} 734 735static inline struct qreg 736qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 737{ 738 struct qreg t = qir_MOV(c, src); 739 c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; 740 return t; 741} 742 743static inline struct qreg 744qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 745{ 746 struct qreg t = qir_FMOV(c, src); 747 c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; 748 return t; 749} 750 751static inline struct qreg 752qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 753{ 754 struct qreg t = qir_MOV(c, src); 755 c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; 756 return t; 757} 758 759static inline void 760qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan) 761{ 762 assert(!dest.pack); 763 dest.pack = QPU_PACK_MUL_8A + chan; 764 qir_emit_nondef(c, qir_inst(QOP_MMOV, dest, val, c->undef)); 765} 766 767static inline struct qreg 768qir_PACK_8888_F(struct vc4_compile *c, struct qreg val) 769{ 770 struct qreg dest = qir_MMOV(c, val); 771 c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888; 772 return dest; 773} 774 775static inline struct qreg 776qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 777{ 778 return qir_EXP2(c, qir_FMUL(c, 779 y, 780 qir_LOG2(c, x))); 781} 782 783static inline void 784qir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 785{ 786 qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val); 787} 788 789static inline struct qreg 790qir_LOAD_IMM(struct vc4_compile *c, uint32_t val) 791{ 792 return qir_emit_def(c, qir_inst(QOP_LOAD_IMM, c->undef, 793 qir_reg(QFILE_LOAD_IMM, val), c->undef)); 794} 795 796static inline struct qreg 797qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val) 798{ 799 return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef, 800 qir_reg(QFILE_LOAD_IMM, val), 801 c->undef)); 802} 803 804static inline struct qreg 805qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val) 806{ 807 return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef, 808 qir_reg(QFILE_LOAD_IMM, val), 809 c->undef)); 810} 811 812/** Shifts the multiply output to the right by rot channels */ 813static inline struct qreg 814qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot) 815{ 816 return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef, 817 val, 818 qir_reg(QFILE_LOAD_IMM, 819 QPU_SMALL_IMM_MUL_ROT + rot))); 820} 821 822static inline struct qinst * 823qir_MOV_cond(struct vc4_compile *c, uint8_t cond, 824 struct qreg dest, struct qreg src) 825{ 826 struct qinst *mov = qir_MOV_dest(c, dest, src); 827 mov->cond = cond; 828 return mov; 829} 830 831static inline struct qinst * 832qir_BRANCH(struct vc4_compile *c, uint8_t cond) 833{ 834 struct qinst *inst = qir_inst(QOP_BRANCH, c->undef, c->undef, c->undef); 835 inst->cond = cond; 836 qir_emit_nondef(c, inst); 837 return inst; 838} 839 840#define qir_for_each_block(block, c) \ 841 list_for_each_entry(struct qblock, block, &c->blocks, link) 842 843#define qir_for_each_block_rev(block, c) \ 844 list_for_each_entry_rev(struct qblock, block, &c->blocks, link) 845 846/* Loop over the non-NULL members of the successors array. */ 847#define qir_for_each_successor(succ, block) \ 848 for (struct qblock *succ = block->successors[0]; \ 849 succ != NULL; \ 850 succ = (succ == block->successors[1] ? NULL : \ 851 block->successors[1])) 852 853#define qir_for_each_inst(inst, block) \ 854 list_for_each_entry(struct qinst, inst, &block->instructions, link) 855 856#define qir_for_each_inst_rev(inst, block) \ 857 list_for_each_entry_rev(struct qinst, inst, &block->instructions, link) 858 859#define qir_for_each_inst_safe(inst, block) \ 860 list_for_each_entry_safe(struct qinst, inst, &block->instructions, link) 861 862#define qir_for_each_inst_inorder(inst, c) \ 863 qir_for_each_block(_block, c) \ 864 qir_for_each_inst_safe(inst, _block) 865 866#endif /* VC4_QIR_H */ 867