1848b8605Smrg/* 2848b8605Smrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3848b8605Smrg * 4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5848b8605Smrg * copy of this software and associated documentation files (the "Software"), 6848b8605Smrg * to deal in the Software without restriction, including without limitation 7848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 8848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom 9848b8605Smrg * the Software is furnished to do so, subject to the following conditions: 10848b8605Smrg * 11848b8605Smrg * The above copyright notice and this permission notice (including the next 12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 13848b8605Smrg * Software. 14848b8605Smrg * 15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 22848b8605Smrg */ 23848b8605Smrg#include "r600_sq.h" 24848b8605Smrg#include "r600_opcodes.h" 25848b8605Smrg#include "r600_formats.h" 26848b8605Smrg#include "r600_shader.h" 27848b8605Smrg#include "r600d.h" 28848b8605Smrg 29848b8605Smrg#include <errno.h> 30b8e80941Smrg#include "util/u_bitcast.h" 31848b8605Smrg#include "util/u_dump.h" 32848b8605Smrg#include "util/u_memory.h" 33848b8605Smrg#include "util/u_math.h" 34848b8605Smrg#include "pipe/p_shader_tokens.h" 35848b8605Smrg 36848b8605Smrg#include "sb/sb_public.h" 37848b8605Smrg 38848b8605Smrg#define NUM_OF_CYCLES 3 39848b8605Smrg#define NUM_OF_COMPONENTS 4 40848b8605Smrg 41b8e80941Smrgstatic inline bool alu_writes(struct r600_bytecode_alu *alu) 42848b8605Smrg{ 43b8e80941Smrg return alu->dst.write || alu->is_op3; 44848b8605Smrg} 45848b8605Smrg 46b8e80941Smrgstatic inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu) 47b8e80941Smrg{ 48b8e80941Smrg return r600_isa_alu(alu->op)->src_count; 49b8e80941Smrg} 50848b8605Smrg 51848b8605Smrgstatic struct r600_bytecode_cf *r600_bytecode_cf(void) 52848b8605Smrg{ 53848b8605Smrg struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); 54848b8605Smrg 55b8e80941Smrg if (!cf) 56848b8605Smrg return NULL; 57848b8605Smrg LIST_INITHEAD(&cf->list); 58848b8605Smrg LIST_INITHEAD(&cf->alu); 59848b8605Smrg LIST_INITHEAD(&cf->vtx); 60848b8605Smrg LIST_INITHEAD(&cf->tex); 61b8e80941Smrg LIST_INITHEAD(&cf->gds); 62848b8605Smrg return cf; 63848b8605Smrg} 64848b8605Smrg 65848b8605Smrgstatic struct r600_bytecode_alu *r600_bytecode_alu(void) 66848b8605Smrg{ 67848b8605Smrg struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); 68848b8605Smrg 69b8e80941Smrg if (!alu) 70848b8605Smrg return NULL; 71848b8605Smrg LIST_INITHEAD(&alu->list); 72848b8605Smrg return alu; 73848b8605Smrg} 74848b8605Smrg 75848b8605Smrgstatic struct r600_bytecode_vtx *r600_bytecode_vtx(void) 76848b8605Smrg{ 77848b8605Smrg struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); 78848b8605Smrg 79b8e80941Smrg if (!vtx) 80848b8605Smrg return NULL; 81848b8605Smrg LIST_INITHEAD(&vtx->list); 82848b8605Smrg return vtx; 83848b8605Smrg} 84848b8605Smrg 85848b8605Smrgstatic struct r600_bytecode_tex *r600_bytecode_tex(void) 86848b8605Smrg{ 87848b8605Smrg struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); 88848b8605Smrg 89b8e80941Smrg if (!tex) 90848b8605Smrg return NULL; 91848b8605Smrg LIST_INITHEAD(&tex->list); 92848b8605Smrg return tex; 93848b8605Smrg} 94848b8605Smrg 95b8e80941Smrgstatic struct r600_bytecode_gds *r600_bytecode_gds(void) 96b8e80941Smrg{ 97b8e80941Smrg struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds); 98b8e80941Smrg 99b8e80941Smrg if (gds == NULL) 100b8e80941Smrg return NULL; 101b8e80941Smrg LIST_INITHEAD(&gds->list); 102b8e80941Smrg return gds; 103b8e80941Smrg} 104b8e80941Smrg 105848b8605Smrgstatic unsigned stack_entry_size(enum radeon_family chip) { 106848b8605Smrg /* Wavefront size: 107848b8605Smrg * 64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/ 108848b8605Smrg * Aruba/Sumo/Sumo2/redwood/juniper 109848b8605Smrg * 32: R630/R730/R710/Palm/Cedar 110848b8605Smrg * 16: R610/Rs780 111848b8605Smrg * 112848b8605Smrg * Stack row size: 113848b8605Smrg * Wavefront Size 16 32 48 64 114848b8605Smrg * Columns per Row (R6xx/R7xx/R8xx only) 8 8 4 4 115848b8605Smrg * Columns per Row (R9xx+) 8 4 4 4 */ 116848b8605Smrg 117848b8605Smrg switch (chip) { 118848b8605Smrg /* FIXME: are some chips missing here? */ 119848b8605Smrg /* wavefront size 16 */ 120848b8605Smrg case CHIP_RV610: 121848b8605Smrg case CHIP_RS780: 122848b8605Smrg case CHIP_RV620: 123848b8605Smrg case CHIP_RS880: 124848b8605Smrg /* wavefront size 32 */ 125848b8605Smrg case CHIP_RV630: 126848b8605Smrg case CHIP_RV635: 127848b8605Smrg case CHIP_RV730: 128848b8605Smrg case CHIP_RV710: 129848b8605Smrg case CHIP_PALM: 130848b8605Smrg case CHIP_CEDAR: 131848b8605Smrg return 8; 132848b8605Smrg 133848b8605Smrg /* wavefront size 64 */ 134848b8605Smrg default: 135848b8605Smrg return 4; 136848b8605Smrg } 137848b8605Smrg} 138848b8605Smrg 139848b8605Smrgvoid r600_bytecode_init(struct r600_bytecode *bc, 140848b8605Smrg enum chip_class chip_class, 141848b8605Smrg enum radeon_family family, 142848b8605Smrg bool has_compressed_msaa_texturing) 143848b8605Smrg{ 144848b8605Smrg static unsigned next_shader_id = 0; 145848b8605Smrg 146848b8605Smrg bc->debug_id = ++next_shader_id; 147848b8605Smrg 148848b8605Smrg if ((chip_class == R600) && 149848b8605Smrg (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { 150848b8605Smrg bc->ar_handling = AR_HANDLE_RV6XX; 151848b8605Smrg bc->r6xx_nop_after_rel_dst = 1; 152848b8605Smrg } else { 153848b8605Smrg bc->ar_handling = AR_HANDLE_NORMAL; 154848b8605Smrg bc->r6xx_nop_after_rel_dst = 0; 155848b8605Smrg } 156848b8605Smrg 157848b8605Smrg LIST_INITHEAD(&bc->cf); 158848b8605Smrg bc->chip_class = chip_class; 159848b8605Smrg bc->family = family; 160848b8605Smrg bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing; 161848b8605Smrg bc->stack.entry_size = stack_entry_size(family); 162848b8605Smrg} 163848b8605Smrg 164848b8605Smrgint r600_bytecode_add_cf(struct r600_bytecode *bc) 165848b8605Smrg{ 166848b8605Smrg struct r600_bytecode_cf *cf = r600_bytecode_cf(); 167848b8605Smrg 168b8e80941Smrg if (!cf) 169848b8605Smrg return -ENOMEM; 170848b8605Smrg LIST_ADDTAIL(&cf->list, &bc->cf); 171848b8605Smrg if (bc->cf_last) { 172848b8605Smrg cf->id = bc->cf_last->id + 2; 173848b8605Smrg if (bc->cf_last->eg_alu_extended) { 174848b8605Smrg /* take into account extended alu size */ 175848b8605Smrg cf->id += 2; 176848b8605Smrg bc->ndw += 2; 177848b8605Smrg } 178848b8605Smrg } 179848b8605Smrg bc->cf_last = cf; 180848b8605Smrg bc->ncf++; 181848b8605Smrg bc->ndw += 2; 182848b8605Smrg bc->force_add_cf = 0; 183848b8605Smrg bc->ar_loaded = 0; 184848b8605Smrg return 0; 185848b8605Smrg} 186848b8605Smrg 187848b8605Smrgint r600_bytecode_add_output(struct r600_bytecode *bc, 188848b8605Smrg const struct r600_bytecode_output *output) 189848b8605Smrg{ 190848b8605Smrg int r; 191848b8605Smrg 192848b8605Smrg if (output->gpr >= bc->ngpr) 193848b8605Smrg bc->ngpr = output->gpr + 1; 194848b8605Smrg 195848b8605Smrg if (bc->cf_last && (bc->cf_last->op == output->op || 196848b8605Smrg (bc->cf_last->op == CF_OP_EXPORT && 197848b8605Smrg output->op == CF_OP_EXPORT_DONE)) && 198848b8605Smrg output->type == bc->cf_last->output.type && 199848b8605Smrg output->elem_size == bc->cf_last->output.elem_size && 200848b8605Smrg output->swizzle_x == bc->cf_last->output.swizzle_x && 201848b8605Smrg output->swizzle_y == bc->cf_last->output.swizzle_y && 202848b8605Smrg output->swizzle_z == bc->cf_last->output.swizzle_z && 203848b8605Smrg output->swizzle_w == bc->cf_last->output.swizzle_w && 204848b8605Smrg output->comp_mask == bc->cf_last->output.comp_mask && 205848b8605Smrg (output->burst_count + bc->cf_last->output.burst_count) <= 16) { 206848b8605Smrg 207848b8605Smrg if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && 208848b8605Smrg (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { 209848b8605Smrg 210848b8605Smrg bc->cf_last->op = bc->cf_last->output.op = output->op; 211848b8605Smrg bc->cf_last->output.gpr = output->gpr; 212848b8605Smrg bc->cf_last->output.array_base = output->array_base; 213848b8605Smrg bc->cf_last->output.burst_count += output->burst_count; 214848b8605Smrg return 0; 215848b8605Smrg 216848b8605Smrg } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && 217848b8605Smrg output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { 218848b8605Smrg 219848b8605Smrg bc->cf_last->op = bc->cf_last->output.op = output->op; 220848b8605Smrg bc->cf_last->output.burst_count += output->burst_count; 221848b8605Smrg return 0; 222848b8605Smrg } 223848b8605Smrg } 224848b8605Smrg 225848b8605Smrg r = r600_bytecode_add_cf(bc); 226848b8605Smrg if (r) 227848b8605Smrg return r; 228848b8605Smrg bc->cf_last->op = output->op; 229848b8605Smrg memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); 230848b8605Smrg bc->cf_last->barrier = 1; 231848b8605Smrg return 0; 232848b8605Smrg} 233848b8605Smrg 234b8e80941Smrgint r600_bytecode_add_pending_output(struct r600_bytecode *bc, 235b8e80941Smrg const struct r600_bytecode_output *output) 236b8e80941Smrg{ 237b8e80941Smrg assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs)); 238b8e80941Smrg bc->pending_outputs[bc->n_pending_outputs++] = *output; 239b8e80941Smrg 240b8e80941Smrg return 0; 241b8e80941Smrg} 242b8e80941Smrg 243b8e80941Smrgvoid r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack) 244b8e80941Smrg{ 245b8e80941Smrg bc->need_wait_ack = need_wait_ack; 246b8e80941Smrg} 247b8e80941Smrg 248b8e80941Smrgboolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc) 249b8e80941Smrg{ 250b8e80941Smrg return bc->need_wait_ack; 251b8e80941Smrg} 252b8e80941Smrg 253848b8605Smrg/* alu instructions that can ony exits once per group */ 254b8e80941Smrgstatic int is_alu_once_inst(struct r600_bytecode_alu *alu) 255848b8605Smrg{ 256b8e80941Smrg return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER; 257848b8605Smrg} 258848b8605Smrg 259848b8605Smrgstatic int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 260848b8605Smrg{ 261848b8605Smrg return (r600_isa_alu(alu->op)->flags & AF_REPL) && 262848b8605Smrg (r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V); 263848b8605Smrg} 264848b8605Smrg 265b8e80941Smrgstatic int is_alu_mova_inst(struct r600_bytecode_alu *alu) 266848b8605Smrg{ 267848b8605Smrg return r600_isa_alu(alu->op)->flags & AF_MOVA; 268848b8605Smrg} 269848b8605Smrg 270b8e80941Smrgstatic int alu_uses_rel(struct r600_bytecode_alu *alu) 271848b8605Smrg{ 272b8e80941Smrg unsigned num_src = r600_bytecode_get_num_operands(alu); 273848b8605Smrg unsigned src; 274848b8605Smrg 275848b8605Smrg if (alu->dst.rel) { 276848b8605Smrg return 1; 277848b8605Smrg } 278848b8605Smrg 279848b8605Smrg for (src = 0; src < num_src; ++src) { 280848b8605Smrg if (alu->src[src].rel) { 281848b8605Smrg return 1; 282848b8605Smrg } 283848b8605Smrg } 284848b8605Smrg return 0; 285848b8605Smrg} 286848b8605Smrg 287b8e80941Smrgstatic int is_lds_read(int sel) 288b8e80941Smrg{ 289b8e80941Smrg return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP; 290b8e80941Smrg} 291b8e80941Smrg 292b8e80941Smrgstatic int alu_uses_lds(struct r600_bytecode_alu *alu) 293b8e80941Smrg{ 294b8e80941Smrg unsigned num_src = r600_bytecode_get_num_operands(alu); 295b8e80941Smrg unsigned src; 296b8e80941Smrg 297b8e80941Smrg for (src = 0; src < num_src; ++src) { 298b8e80941Smrg if (is_lds_read(alu->src[src].sel)) { 299b8e80941Smrg return 1; 300b8e80941Smrg } 301b8e80941Smrg } 302b8e80941Smrg return 0; 303b8e80941Smrg} 304b8e80941Smrg 305b8e80941Smrgstatic int is_alu_64bit_inst(struct r600_bytecode_alu *alu) 306b8e80941Smrg{ 307b8e80941Smrg const struct alu_op_info *op = r600_isa_alu(alu->op); 308b8e80941Smrg return (op->flags & AF_64); 309b8e80941Smrg} 310b8e80941Smrg 311848b8605Smrgstatic int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 312848b8605Smrg{ 313848b8605Smrg unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 314848b8605Smrg return !(slots & AF_S); 315848b8605Smrg} 316848b8605Smrg 317848b8605Smrgstatic int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 318848b8605Smrg{ 319848b8605Smrg unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 320848b8605Smrg return !(slots & AF_V); 321848b8605Smrg} 322848b8605Smrg 323848b8605Smrg/* alu instructions that can execute on any unit */ 324848b8605Smrgstatic int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 325848b8605Smrg{ 326848b8605Smrg unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 327848b8605Smrg return slots == AF_VS; 328848b8605Smrg} 329848b8605Smrg 330b8e80941Smrgstatic int is_nop_inst(struct r600_bytecode_alu *alu) 331848b8605Smrg{ 332848b8605Smrg return alu->op == ALU_OP0_NOP; 333b8e80941Smrg} 334848b8605Smrg 335848b8605Smrgstatic int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, 336848b8605Smrg struct r600_bytecode_alu *assignment[5]) 337848b8605Smrg{ 338848b8605Smrg struct r600_bytecode_alu *alu; 339848b8605Smrg unsigned i, chan, trans; 340848b8605Smrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 341848b8605Smrg 342848b8605Smrg for (i = 0; i < max_slots; i++) 343848b8605Smrg assignment[i] = NULL; 344848b8605Smrg 345848b8605Smrg for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) { 346848b8605Smrg chan = alu->dst.chan; 347848b8605Smrg if (max_slots == 4) 348848b8605Smrg trans = 0; 349848b8605Smrg else if (is_alu_trans_unit_inst(bc, alu)) 350848b8605Smrg trans = 1; 351848b8605Smrg else if (is_alu_vec_unit_inst(bc, alu)) 352848b8605Smrg trans = 0; 353848b8605Smrg else if (assignment[chan]) 354848b8605Smrg trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */ 355848b8605Smrg else 356848b8605Smrg trans = 0; 357848b8605Smrg 358848b8605Smrg if (trans) { 359848b8605Smrg if (assignment[4]) { 360848b8605Smrg assert(0); /* ALU.Trans has already been allocated. */ 361848b8605Smrg return -1; 362848b8605Smrg } 363848b8605Smrg assignment[4] = alu; 364848b8605Smrg } else { 365848b8605Smrg if (assignment[chan]) { 366848b8605Smrg assert(0); /* ALU.chan has already been allocated. */ 367848b8605Smrg return -1; 368848b8605Smrg } 369848b8605Smrg assignment[chan] = alu; 370848b8605Smrg } 371848b8605Smrg 372848b8605Smrg if (alu->last) 373848b8605Smrg break; 374848b8605Smrg } 375848b8605Smrg return 0; 376848b8605Smrg} 377848b8605Smrg 378848b8605Smrgstruct alu_bank_swizzle { 379848b8605Smrg int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; 380848b8605Smrg int hw_cfile_addr[4]; 381848b8605Smrg int hw_cfile_elem[4]; 382848b8605Smrg}; 383848b8605Smrg 384848b8605Smrgstatic const unsigned cycle_for_bank_swizzle_vec[][3] = { 385848b8605Smrg [SQ_ALU_VEC_012] = { 0, 1, 2 }, 386848b8605Smrg [SQ_ALU_VEC_021] = { 0, 2, 1 }, 387848b8605Smrg [SQ_ALU_VEC_120] = { 1, 2, 0 }, 388848b8605Smrg [SQ_ALU_VEC_102] = { 1, 0, 2 }, 389848b8605Smrg [SQ_ALU_VEC_201] = { 2, 0, 1 }, 390848b8605Smrg [SQ_ALU_VEC_210] = { 2, 1, 0 } 391848b8605Smrg}; 392848b8605Smrg 393848b8605Smrgstatic const unsigned cycle_for_bank_swizzle_scl[][3] = { 394848b8605Smrg [SQ_ALU_SCL_210] = { 2, 1, 0 }, 395848b8605Smrg [SQ_ALU_SCL_122] = { 1, 2, 2 }, 396848b8605Smrg [SQ_ALU_SCL_212] = { 2, 1, 2 }, 397848b8605Smrg [SQ_ALU_SCL_221] = { 2, 2, 1 } 398848b8605Smrg}; 399848b8605Smrg 400848b8605Smrgstatic void init_bank_swizzle(struct alu_bank_swizzle *bs) 401848b8605Smrg{ 402848b8605Smrg int i, cycle, component; 403848b8605Smrg /* set up gpr use */ 404848b8605Smrg for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) 405848b8605Smrg for (component = 0; component < NUM_OF_COMPONENTS; component++) 406848b8605Smrg bs->hw_gpr[cycle][component] = -1; 407848b8605Smrg for (i = 0; i < 4; i++) 408848b8605Smrg bs->hw_cfile_addr[i] = -1; 409848b8605Smrg for (i = 0; i < 4; i++) 410848b8605Smrg bs->hw_cfile_elem[i] = -1; 411848b8605Smrg} 412848b8605Smrg 413848b8605Smrgstatic int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) 414848b8605Smrg{ 415848b8605Smrg if (bs->hw_gpr[cycle][chan] == -1) 416848b8605Smrg bs->hw_gpr[cycle][chan] = sel; 417848b8605Smrg else if (bs->hw_gpr[cycle][chan] != (int)sel) { 418848b8605Smrg /* Another scalar operation has already used the GPR read port for the channel. */ 419848b8605Smrg return -1; 420848b8605Smrg } 421848b8605Smrg return 0; 422848b8605Smrg} 423848b8605Smrg 424b8e80941Smrgstatic int reserve_cfile(const struct r600_bytecode *bc, 425b8e80941Smrg struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) 426848b8605Smrg{ 427848b8605Smrg int res, num_res = 4; 428848b8605Smrg if (bc->chip_class >= R700) { 429848b8605Smrg num_res = 2; 430848b8605Smrg chan /= 2; 431848b8605Smrg } 432848b8605Smrg for (res = 0; res < num_res; ++res) { 433848b8605Smrg if (bs->hw_cfile_addr[res] == -1) { 434848b8605Smrg bs->hw_cfile_addr[res] = sel; 435848b8605Smrg bs->hw_cfile_elem[res] = chan; 436848b8605Smrg return 0; 437848b8605Smrg } else if (bs->hw_cfile_addr[res] == sel && 438848b8605Smrg bs->hw_cfile_elem[res] == chan) 439848b8605Smrg return 0; /* Read for this scalar element already reserved, nothing to do here. */ 440848b8605Smrg } 441848b8605Smrg /* All cfile read ports are used, cannot reference vector element. */ 442848b8605Smrg return -1; 443848b8605Smrg} 444848b8605Smrg 445848b8605Smrgstatic int is_gpr(unsigned sel) 446848b8605Smrg{ 447848b8605Smrg return (sel <= 127); 448848b8605Smrg} 449848b8605Smrg 450848b8605Smrg/* CB constants start at 512, and get translated to a kcache index when ALU 451848b8605Smrg * clauses are constructed. Note that we handle kcache constants the same way 452848b8605Smrg * as (the now gone) cfile constants, is that really required? */ 453848b8605Smrgstatic int is_cfile(unsigned sel) 454848b8605Smrg{ 455848b8605Smrg return (sel > 255 && sel < 512) || 456848b8605Smrg (sel > 511 && sel < 4607) || /* Kcache before translation. */ 457848b8605Smrg (sel > 127 && sel < 192); /* Kcache after translation. */ 458848b8605Smrg} 459848b8605Smrg 460848b8605Smrgstatic int is_const(int sel) 461848b8605Smrg{ 462848b8605Smrg return is_cfile(sel) || 463848b8605Smrg (sel >= V_SQ_ALU_SRC_0 && 464848b8605Smrg sel <= V_SQ_ALU_SRC_LITERAL); 465848b8605Smrg} 466848b8605Smrg 467b8e80941Smrgstatic int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, 468848b8605Smrg struct alu_bank_swizzle *bs, int bank_swizzle) 469848b8605Smrg{ 470848b8605Smrg int r, src, num_src, sel, elem, cycle; 471848b8605Smrg 472b8e80941Smrg num_src = r600_bytecode_get_num_operands(alu); 473848b8605Smrg for (src = 0; src < num_src; src++) { 474848b8605Smrg sel = alu->src[src].sel; 475848b8605Smrg elem = alu->src[src].chan; 476848b8605Smrg if (is_gpr(sel)) { 477848b8605Smrg cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; 478848b8605Smrg if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) 479848b8605Smrg /* Nothing to do; special-case optimization, 480848b8605Smrg * second source uses first source’s reservation. */ 481848b8605Smrg continue; 482848b8605Smrg else { 483848b8605Smrg r = reserve_gpr(bs, sel, elem, cycle); 484848b8605Smrg if (r) 485848b8605Smrg return r; 486848b8605Smrg } 487848b8605Smrg } else if (is_cfile(sel)) { 488848b8605Smrg r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 489848b8605Smrg if (r) 490848b8605Smrg return r; 491848b8605Smrg } 492848b8605Smrg /* No restrictions on PV, PS, literal or special constants. */ 493848b8605Smrg } 494848b8605Smrg return 0; 495848b8605Smrg} 496848b8605Smrg 497b8e80941Smrgstatic int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, 498848b8605Smrg struct alu_bank_swizzle *bs, int bank_swizzle) 499848b8605Smrg{ 500848b8605Smrg int r, src, num_src, const_count, sel, elem, cycle; 501848b8605Smrg 502b8e80941Smrg num_src = r600_bytecode_get_num_operands(alu); 503848b8605Smrg for (const_count = 0, src = 0; src < num_src; ++src) { 504848b8605Smrg sel = alu->src[src].sel; 505848b8605Smrg elem = alu->src[src].chan; 506848b8605Smrg if (is_const(sel)) { /* Any constant, including literal and inline constants. */ 507848b8605Smrg if (const_count >= 2) 508848b8605Smrg /* More than two references to a constant in 509848b8605Smrg * transcendental operation. */ 510848b8605Smrg return -1; 511848b8605Smrg else 512848b8605Smrg const_count++; 513848b8605Smrg } 514848b8605Smrg if (is_cfile(sel)) { 515848b8605Smrg r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 516848b8605Smrg if (r) 517848b8605Smrg return r; 518848b8605Smrg } 519848b8605Smrg } 520848b8605Smrg for (src = 0; src < num_src; ++src) { 521848b8605Smrg sel = alu->src[src].sel; 522848b8605Smrg elem = alu->src[src].chan; 523848b8605Smrg if (is_gpr(sel)) { 524848b8605Smrg cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 525848b8605Smrg if (cycle < const_count) 526848b8605Smrg /* Cycle for GPR load conflicts with 527848b8605Smrg * constant load in transcendental operation. */ 528848b8605Smrg return -1; 529848b8605Smrg r = reserve_gpr(bs, sel, elem, cycle); 530848b8605Smrg if (r) 531848b8605Smrg return r; 532848b8605Smrg } 533848b8605Smrg /* PV PS restrictions */ 534848b8605Smrg if (const_count && (sel == 254 || sel == 255)) { 535848b8605Smrg cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 536848b8605Smrg if (cycle < const_count) 537848b8605Smrg return -1; 538848b8605Smrg } 539848b8605Smrg } 540848b8605Smrg return 0; 541848b8605Smrg} 542848b8605Smrg 543b8e80941Smrgstatic int check_and_set_bank_swizzle(const struct r600_bytecode *bc, 544848b8605Smrg struct r600_bytecode_alu *slots[5]) 545848b8605Smrg{ 546848b8605Smrg struct alu_bank_swizzle bs; 547848b8605Smrg int bank_swizzle[5]; 548848b8605Smrg int i, r = 0, forced = 1; 549848b8605Smrg boolean scalar_only = bc->chip_class == CAYMAN ? false : true; 550848b8605Smrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 551848b8605Smrg 552848b8605Smrg for (i = 0; i < max_slots; i++) { 553848b8605Smrg if (slots[i]) { 554848b8605Smrg if (slots[i]->bank_swizzle_force) { 555848b8605Smrg slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; 556848b8605Smrg } else { 557848b8605Smrg forced = 0; 558848b8605Smrg } 559848b8605Smrg } 560848b8605Smrg 561848b8605Smrg if (i < 4 && slots[i]) 562848b8605Smrg scalar_only = false; 563848b8605Smrg } 564848b8605Smrg if (forced) 565848b8605Smrg return 0; 566848b8605Smrg 567848b8605Smrg /* Just check every possible combination of bank swizzle. 568848b8605Smrg * Not very efficent, but works on the first try in most of the cases. */ 569848b8605Smrg for (i = 0; i < 4; i++) 570848b8605Smrg if (!slots[i] || !slots[i]->bank_swizzle_force) 571848b8605Smrg bank_swizzle[i] = SQ_ALU_VEC_012; 572848b8605Smrg else 573848b8605Smrg bank_swizzle[i] = slots[i]->bank_swizzle; 574848b8605Smrg 575848b8605Smrg bank_swizzle[4] = SQ_ALU_SCL_210; 576848b8605Smrg while(bank_swizzle[4] <= SQ_ALU_SCL_221) { 577848b8605Smrg 578848b8605Smrg init_bank_swizzle(&bs); 579848b8605Smrg if (scalar_only == false) { 580848b8605Smrg for (i = 0; i < 4; i++) { 581848b8605Smrg if (slots[i]) { 582848b8605Smrg r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); 583848b8605Smrg if (r) 584848b8605Smrg break; 585848b8605Smrg } 586848b8605Smrg } 587848b8605Smrg } else 588848b8605Smrg r = 0; 589848b8605Smrg 590848b8605Smrg if (!r && max_slots == 5 && slots[4]) { 591848b8605Smrg r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); 592848b8605Smrg } 593848b8605Smrg if (!r) { 594848b8605Smrg for (i = 0; i < max_slots; i++) { 595848b8605Smrg if (slots[i]) 596848b8605Smrg slots[i]->bank_swizzle = bank_swizzle[i]; 597848b8605Smrg } 598848b8605Smrg return 0; 599848b8605Smrg } 600848b8605Smrg 601848b8605Smrg if (scalar_only) { 602848b8605Smrg bank_swizzle[4]++; 603848b8605Smrg } else { 604848b8605Smrg for (i = 0; i < max_slots; i++) { 605848b8605Smrg if (!slots[i] || !slots[i]->bank_swizzle_force) { 606848b8605Smrg bank_swizzle[i]++; 607848b8605Smrg if (bank_swizzle[i] <= SQ_ALU_VEC_210) 608848b8605Smrg break; 609848b8605Smrg else if (i < max_slots - 1) 610848b8605Smrg bank_swizzle[i] = SQ_ALU_VEC_012; 611848b8605Smrg else 612848b8605Smrg return -1; 613848b8605Smrg } 614848b8605Smrg } 615848b8605Smrg } 616848b8605Smrg } 617848b8605Smrg 618848b8605Smrg /* Couldn't find a working swizzle. */ 619848b8605Smrg return -1; 620848b8605Smrg} 621848b8605Smrg 622848b8605Smrgstatic int replace_gpr_with_pv_ps(struct r600_bytecode *bc, 623848b8605Smrg struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) 624848b8605Smrg{ 625848b8605Smrg struct r600_bytecode_alu *prev[5]; 626848b8605Smrg int gpr[5], chan[5]; 627848b8605Smrg int i, j, r, src, num_src; 628848b8605Smrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 629848b8605Smrg 630848b8605Smrg r = assign_alu_units(bc, alu_prev, prev); 631848b8605Smrg if (r) 632848b8605Smrg return r; 633848b8605Smrg 634848b8605Smrg for (i = 0; i < max_slots; ++i) { 635b8e80941Smrg if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) { 636b8e80941Smrg 637b8e80941Smrg if (is_alu_64bit_inst(prev[i])) { 638b8e80941Smrg gpr[i] = -1; 639b8e80941Smrg continue; 640b8e80941Smrg } 641b8e80941Smrg 642848b8605Smrg gpr[i] = prev[i]->dst.sel; 643848b8605Smrg /* cube writes more than PV.X */ 644848b8605Smrg if (is_alu_reduction_inst(bc, prev[i])) 645848b8605Smrg chan[i] = 0; 646848b8605Smrg else 647848b8605Smrg chan[i] = prev[i]->dst.chan; 648848b8605Smrg } else 649848b8605Smrg gpr[i] = -1; 650848b8605Smrg } 651848b8605Smrg 652848b8605Smrg for (i = 0; i < max_slots; ++i) { 653848b8605Smrg struct r600_bytecode_alu *alu = slots[i]; 654b8e80941Smrg if (!alu) 655848b8605Smrg continue; 656848b8605Smrg 657b8e80941Smrg if (is_alu_64bit_inst(alu)) 658b8e80941Smrg continue; 659b8e80941Smrg num_src = r600_bytecode_get_num_operands(alu); 660848b8605Smrg for (src = 0; src < num_src; ++src) { 661848b8605Smrg if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) 662848b8605Smrg continue; 663848b8605Smrg 664848b8605Smrg if (bc->chip_class < CAYMAN) { 665848b8605Smrg if (alu->src[src].sel == gpr[4] && 666848b8605Smrg alu->src[src].chan == chan[4] && 667848b8605Smrg alu_prev->pred_sel == alu->pred_sel) { 668848b8605Smrg alu->src[src].sel = V_SQ_ALU_SRC_PS; 669848b8605Smrg alu->src[src].chan = 0; 670848b8605Smrg continue; 671848b8605Smrg } 672848b8605Smrg } 673848b8605Smrg 674848b8605Smrg for (j = 0; j < 4; ++j) { 675848b8605Smrg if (alu->src[src].sel == gpr[j] && 676848b8605Smrg alu->src[src].chan == j && 677848b8605Smrg alu_prev->pred_sel == alu->pred_sel) { 678848b8605Smrg alu->src[src].sel = V_SQ_ALU_SRC_PV; 679848b8605Smrg alu->src[src].chan = chan[j]; 680848b8605Smrg break; 681848b8605Smrg } 682848b8605Smrg } 683848b8605Smrg } 684848b8605Smrg } 685848b8605Smrg 686848b8605Smrg return 0; 687848b8605Smrg} 688848b8605Smrg 689b8e80941Smrgvoid r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs) 690848b8605Smrg{ 691848b8605Smrg switch(value) { 692848b8605Smrg case 0: 693848b8605Smrg *sel = V_SQ_ALU_SRC_0; 694848b8605Smrg break; 695848b8605Smrg case 1: 696848b8605Smrg *sel = V_SQ_ALU_SRC_1_INT; 697848b8605Smrg break; 698848b8605Smrg case -1: 699848b8605Smrg *sel = V_SQ_ALU_SRC_M_1_INT; 700848b8605Smrg break; 701848b8605Smrg case 0x3F800000: /* 1.0f */ 702848b8605Smrg *sel = V_SQ_ALU_SRC_1; 703848b8605Smrg break; 704848b8605Smrg case 0x3F000000: /* 0.5f */ 705848b8605Smrg *sel = V_SQ_ALU_SRC_0_5; 706848b8605Smrg break; 707848b8605Smrg case 0xBF800000: /* -1.0f */ 708848b8605Smrg *sel = V_SQ_ALU_SRC_1; 709b8e80941Smrg *neg ^= !abs; 710848b8605Smrg break; 711848b8605Smrg case 0xBF000000: /* -0.5f */ 712848b8605Smrg *sel = V_SQ_ALU_SRC_0_5; 713b8e80941Smrg *neg ^= !abs; 714848b8605Smrg break; 715848b8605Smrg default: 716848b8605Smrg *sel = V_SQ_ALU_SRC_LITERAL; 717848b8605Smrg break; 718848b8605Smrg } 719848b8605Smrg} 720848b8605Smrg 721848b8605Smrg/* compute how many literal are needed */ 722b8e80941Smrgstatic int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu, 723848b8605Smrg uint32_t literal[4], unsigned *nliteral) 724848b8605Smrg{ 725b8e80941Smrg unsigned num_src = r600_bytecode_get_num_operands(alu); 726848b8605Smrg unsigned i, j; 727848b8605Smrg 728848b8605Smrg for (i = 0; i < num_src; ++i) { 729848b8605Smrg if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 730848b8605Smrg uint32_t value = alu->src[i].value; 731848b8605Smrg unsigned found = 0; 732848b8605Smrg for (j = 0; j < *nliteral; ++j) { 733848b8605Smrg if (literal[j] == value) { 734848b8605Smrg found = 1; 735848b8605Smrg break; 736848b8605Smrg } 737848b8605Smrg } 738848b8605Smrg if (!found) { 739848b8605Smrg if (*nliteral >= 4) 740848b8605Smrg return -EINVAL; 741848b8605Smrg literal[(*nliteral)++] = value; 742848b8605Smrg } 743848b8605Smrg } 744848b8605Smrg } 745848b8605Smrg return 0; 746848b8605Smrg} 747848b8605Smrg 748b8e80941Smrgstatic void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu, 749b8e80941Smrg uint32_t literal[4], unsigned nliteral) 750848b8605Smrg{ 751b8e80941Smrg unsigned num_src = r600_bytecode_get_num_operands(alu); 752848b8605Smrg unsigned i, j; 753848b8605Smrg 754848b8605Smrg for (i = 0; i < num_src; ++i) { 755848b8605Smrg if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 756848b8605Smrg uint32_t value = alu->src[i].value; 757848b8605Smrg for (j = 0; j < nliteral; ++j) { 758848b8605Smrg if (literal[j] == value) { 759848b8605Smrg alu->src[i].chan = j; 760848b8605Smrg break; 761848b8605Smrg } 762848b8605Smrg } 763848b8605Smrg } 764848b8605Smrg } 765848b8605Smrg} 766848b8605Smrg 767848b8605Smrgstatic int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], 768848b8605Smrg struct r600_bytecode_alu *alu_prev) 769848b8605Smrg{ 770848b8605Smrg struct r600_bytecode_alu *prev[5]; 771848b8605Smrg struct r600_bytecode_alu *result[5] = { NULL }; 772848b8605Smrg 773848b8605Smrg uint32_t literal[4], prev_literal[4]; 774848b8605Smrg unsigned nliteral = 0, prev_nliteral = 0; 775848b8605Smrg 776848b8605Smrg int i, j, r, src, num_src; 777848b8605Smrg int num_once_inst = 0; 778848b8605Smrg int have_mova = 0, have_rel = 0; 779848b8605Smrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 780848b8605Smrg 781848b8605Smrg r = assign_alu_units(bc, alu_prev, prev); 782848b8605Smrg if (r) 783848b8605Smrg return r; 784848b8605Smrg 785848b8605Smrg for (i = 0; i < max_slots; ++i) { 786848b8605Smrg if (prev[i]) { 787848b8605Smrg if (prev[i]->pred_sel) 788848b8605Smrg return 0; 789b8e80941Smrg if (is_alu_once_inst(prev[i])) 790848b8605Smrg return 0; 791848b8605Smrg } 792848b8605Smrg if (slots[i]) { 793848b8605Smrg if (slots[i]->pred_sel) 794848b8605Smrg return 0; 795b8e80941Smrg if (is_alu_once_inst(slots[i])) 796848b8605Smrg return 0; 797848b8605Smrg } 798848b8605Smrg } 799848b8605Smrg 800848b8605Smrg for (i = 0; i < max_slots; ++i) { 801848b8605Smrg struct r600_bytecode_alu *alu; 802848b8605Smrg 803848b8605Smrg if (num_once_inst > 0) 804848b8605Smrg return 0; 805848b8605Smrg 806848b8605Smrg /* check number of literals */ 807848b8605Smrg if (prev[i]) { 808b8e80941Smrg if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral)) 809848b8605Smrg return 0; 810b8e80941Smrg if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral)) 811848b8605Smrg return 0; 812b8e80941Smrg if (is_alu_mova_inst(prev[i])) { 813848b8605Smrg if (have_rel) 814848b8605Smrg return 0; 815848b8605Smrg have_mova = 1; 816848b8605Smrg } 817848b8605Smrg 818b8e80941Smrg if (alu_uses_rel(prev[i])) { 819848b8605Smrg if (have_mova) { 820848b8605Smrg return 0; 821848b8605Smrg } 822848b8605Smrg have_rel = 1; 823848b8605Smrg } 824b8e80941Smrg if (alu_uses_lds(prev[i])) 825b8e80941Smrg return 0; 826848b8605Smrg 827b8e80941Smrg num_once_inst += is_alu_once_inst(prev[i]); 828848b8605Smrg } 829b8e80941Smrg if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral)) 830848b8605Smrg return 0; 831848b8605Smrg 832848b8605Smrg /* Let's check used slots. */ 833848b8605Smrg if (prev[i] && !slots[i]) { 834848b8605Smrg result[i] = prev[i]; 835848b8605Smrg continue; 836848b8605Smrg } else if (prev[i] && slots[i]) { 837848b8605Smrg if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { 838848b8605Smrg /* Trans unit is still free try to use it. */ 839b8e80941Smrg if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) { 840848b8605Smrg result[i] = prev[i]; 841848b8605Smrg result[4] = slots[i]; 842848b8605Smrg } else if (is_alu_any_unit_inst(bc, prev[i])) { 843848b8605Smrg if (slots[i]->dst.sel == prev[i]->dst.sel && 844b8e80941Smrg alu_writes(slots[i]) && 845b8e80941Smrg alu_writes(prev[i])) 846848b8605Smrg return 0; 847848b8605Smrg 848848b8605Smrg result[i] = slots[i]; 849848b8605Smrg result[4] = prev[i]; 850848b8605Smrg } else 851848b8605Smrg return 0; 852848b8605Smrg } else 853848b8605Smrg return 0; 854848b8605Smrg } else if(!slots[i]) { 855848b8605Smrg continue; 856848b8605Smrg } else { 857848b8605Smrg if (max_slots == 5 && slots[i] && prev[4] && 858848b8605Smrg slots[i]->dst.sel == prev[4]->dst.sel && 859848b8605Smrg slots[i]->dst.chan == prev[4]->dst.chan && 860b8e80941Smrg alu_writes(slots[i]) && 861b8e80941Smrg alu_writes(prev[4])) 862848b8605Smrg return 0; 863848b8605Smrg 864848b8605Smrg result[i] = slots[i]; 865848b8605Smrg } 866848b8605Smrg 867848b8605Smrg alu = slots[i]; 868b8e80941Smrg num_once_inst += is_alu_once_inst(alu); 869848b8605Smrg 870848b8605Smrg /* don't reschedule NOPs */ 871b8e80941Smrg if (is_nop_inst(alu)) 872848b8605Smrg return 0; 873848b8605Smrg 874b8e80941Smrg if (is_alu_mova_inst(alu)) { 875848b8605Smrg if (have_rel) { 876848b8605Smrg return 0; 877848b8605Smrg } 878848b8605Smrg have_mova = 1; 879848b8605Smrg } 880848b8605Smrg 881b8e80941Smrg if (alu_uses_rel(alu)) { 882848b8605Smrg if (have_mova) { 883848b8605Smrg return 0; 884848b8605Smrg } 885848b8605Smrg have_rel = 1; 886848b8605Smrg } 887848b8605Smrg 888b8e80941Smrg if (alu->op == ALU_OP0_SET_CF_IDX0 || 889b8e80941Smrg alu->op == ALU_OP0_SET_CF_IDX1) 890b8e80941Smrg return 0; /* data hazard with MOVA */ 891b8e80941Smrg 892848b8605Smrg /* Let's check source gprs */ 893b8e80941Smrg num_src = r600_bytecode_get_num_operands(alu); 894848b8605Smrg for (src = 0; src < num_src; ++src) { 895848b8605Smrg 896848b8605Smrg /* Constants don't matter. */ 897848b8605Smrg if (!is_gpr(alu->src[src].sel)) 898848b8605Smrg continue; 899848b8605Smrg 900848b8605Smrg for (j = 0; j < max_slots; ++j) { 901b8e80941Smrg if (!prev[j] || !alu_writes(prev[j])) 902848b8605Smrg continue; 903848b8605Smrg 904848b8605Smrg /* If it's relative then we can't determin which gpr is really used. */ 905848b8605Smrg if (prev[j]->dst.chan == alu->src[src].chan && 906848b8605Smrg (prev[j]->dst.sel == alu->src[src].sel || 907848b8605Smrg prev[j]->dst.rel || alu->src[src].rel)) 908848b8605Smrg return 0; 909848b8605Smrg } 910848b8605Smrg } 911848b8605Smrg } 912848b8605Smrg 913848b8605Smrg /* more than one PRED_ or KILL_ ? */ 914848b8605Smrg if (num_once_inst > 1) 915848b8605Smrg return 0; 916848b8605Smrg 917848b8605Smrg /* check if the result can still be swizzlet */ 918848b8605Smrg r = check_and_set_bank_swizzle(bc, result); 919848b8605Smrg if (r) 920848b8605Smrg return 0; 921848b8605Smrg 922848b8605Smrg /* looks like everything worked out right, apply the changes */ 923848b8605Smrg 924848b8605Smrg /* undo adding previus literals */ 925848b8605Smrg bc->cf_last->ndw -= align(prev_nliteral, 2); 926848b8605Smrg 927848b8605Smrg /* sort instructions */ 928848b8605Smrg for (i = 0; i < max_slots; ++i) { 929848b8605Smrg slots[i] = result[i]; 930848b8605Smrg if (result[i]) { 931848b8605Smrg LIST_DEL(&result[i]->list); 932848b8605Smrg result[i]->last = 0; 933848b8605Smrg LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu); 934848b8605Smrg } 935848b8605Smrg } 936848b8605Smrg 937848b8605Smrg /* determine new last instruction */ 938848b8605Smrg LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1; 939848b8605Smrg 940848b8605Smrg /* determine new first instruction */ 941848b8605Smrg for (i = 0; i < max_slots; ++i) { 942848b8605Smrg if (result[i]) { 943848b8605Smrg bc->cf_last->curr_bs_head = result[i]; 944848b8605Smrg break; 945848b8605Smrg } 946848b8605Smrg } 947848b8605Smrg 948848b8605Smrg bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; 949848b8605Smrg bc->cf_last->prev2_bs_head = NULL; 950848b8605Smrg 951848b8605Smrg return 0; 952848b8605Smrg} 953848b8605Smrg 954848b8605Smrg/* we'll keep kcache sets sorted by bank & addr */ 955848b8605Smrgstatic int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc, 956848b8605Smrg struct r600_bytecode_kcache *kcache, 957b8e80941Smrg unsigned bank, unsigned line, unsigned index_mode) 958848b8605Smrg{ 959848b8605Smrg int i, kcache_banks = bc->chip_class >= EVERGREEN ? 4 : 2; 960848b8605Smrg 961848b8605Smrg for (i = 0; i < kcache_banks; i++) { 962848b8605Smrg if (kcache[i].mode) { 963848b8605Smrg int d; 964848b8605Smrg 965848b8605Smrg if (kcache[i].bank < bank) 966848b8605Smrg continue; 967848b8605Smrg 968848b8605Smrg if ((kcache[i].bank == bank && kcache[i].addr > line+1) || 969848b8605Smrg kcache[i].bank > bank) { 970848b8605Smrg /* try to insert new line */ 971848b8605Smrg if (kcache[kcache_banks-1].mode) { 972848b8605Smrg /* all sets are in use */ 973848b8605Smrg return -ENOMEM; 974848b8605Smrg } 975848b8605Smrg 976848b8605Smrg memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache)); 977848b8605Smrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 978848b8605Smrg kcache[i].bank = bank; 979848b8605Smrg kcache[i].addr = line; 980b8e80941Smrg kcache[i].index_mode = index_mode; 981848b8605Smrg return 0; 982848b8605Smrg } 983848b8605Smrg 984848b8605Smrg d = line - kcache[i].addr; 985848b8605Smrg 986848b8605Smrg if (d == -1) { 987848b8605Smrg kcache[i].addr--; 988848b8605Smrg if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) { 989848b8605Smrg /* we are prepending the line to the current set, 990848b8605Smrg * discarding the existing second line, 991848b8605Smrg * so we'll have to insert line+2 after it */ 992848b8605Smrg line += 2; 993848b8605Smrg continue; 994848b8605Smrg } else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) { 995848b8605Smrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 996848b8605Smrg return 0; 997848b8605Smrg } else { 998848b8605Smrg /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */ 999848b8605Smrg return -ENOMEM; 1000848b8605Smrg } 1001848b8605Smrg } else if (d == 1) { 1002848b8605Smrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1003848b8605Smrg return 0; 1004848b8605Smrg } else if (d == 0) 1005848b8605Smrg return 0; 1006848b8605Smrg } else { /* free kcache set - use it */ 1007848b8605Smrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1008848b8605Smrg kcache[i].bank = bank; 1009848b8605Smrg kcache[i].addr = line; 1010b8e80941Smrg kcache[i].index_mode = index_mode; 1011848b8605Smrg return 0; 1012848b8605Smrg } 1013848b8605Smrg } 1014848b8605Smrg return -ENOMEM; 1015848b8605Smrg} 1016848b8605Smrg 1017848b8605Smrgstatic int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc, 1018848b8605Smrg struct r600_bytecode_kcache *kcache, 1019848b8605Smrg struct r600_bytecode_alu *alu) 1020848b8605Smrg{ 1021848b8605Smrg int i, r; 1022848b8605Smrg 1023848b8605Smrg for (i = 0; i < 3; i++) { 1024b8e80941Smrg unsigned bank, line, sel = alu->src[i].sel, index_mode; 1025848b8605Smrg 1026848b8605Smrg if (sel < 512) 1027848b8605Smrg continue; 1028848b8605Smrg 1029848b8605Smrg bank = alu->src[i].kc_bank; 1030b8e80941Smrg assert(bank < R600_MAX_HW_CONST_BUFFERS); 1031848b8605Smrg line = (sel-512)>>4; 1032b8e80941Smrg index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE 1033848b8605Smrg 1034b8e80941Smrg if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode))) 1035848b8605Smrg return r; 1036848b8605Smrg } 1037848b8605Smrg return 0; 1038848b8605Smrg} 1039848b8605Smrg 1040b8e80941Smrgstatic int r600_bytecode_assign_kcache_banks( 1041848b8605Smrg struct r600_bytecode_alu *alu, 1042848b8605Smrg struct r600_bytecode_kcache * kcache) 1043848b8605Smrg{ 1044848b8605Smrg int i, j; 1045848b8605Smrg 1046848b8605Smrg /* Alter the src operands to refer to the kcache. */ 1047848b8605Smrg for (i = 0; i < 3; ++i) { 1048848b8605Smrg static const unsigned int base[] = {128, 160, 256, 288}; 1049848b8605Smrg unsigned int line, sel = alu->src[i].sel, found = 0; 1050848b8605Smrg 1051848b8605Smrg if (sel < 512) 1052848b8605Smrg continue; 1053848b8605Smrg 1054848b8605Smrg sel -= 512; 1055848b8605Smrg line = sel>>4; 1056848b8605Smrg 1057848b8605Smrg for (j = 0; j < 4 && !found; ++j) { 1058848b8605Smrg switch (kcache[j].mode) { 1059848b8605Smrg case V_SQ_CF_KCACHE_NOP: 1060848b8605Smrg case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX: 1061848b8605Smrg R600_ERR("unexpected kcache line mode\n"); 1062848b8605Smrg return -ENOMEM; 1063848b8605Smrg default: 1064848b8605Smrg if (kcache[j].bank == alu->src[i].kc_bank && 1065848b8605Smrg kcache[j].addr <= line && 1066848b8605Smrg line < kcache[j].addr + kcache[j].mode) { 1067848b8605Smrg alu->src[i].sel = sel - (kcache[j].addr<<4); 1068848b8605Smrg alu->src[i].sel += base[j]; 1069848b8605Smrg found=1; 1070848b8605Smrg } 1071848b8605Smrg } 1072848b8605Smrg } 1073848b8605Smrg } 1074848b8605Smrg return 0; 1075848b8605Smrg} 1076848b8605Smrg 1077848b8605Smrgstatic int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, 1078848b8605Smrg struct r600_bytecode_alu *alu, 1079848b8605Smrg unsigned type) 1080848b8605Smrg{ 1081848b8605Smrg struct r600_bytecode_kcache kcache_sets[4]; 1082848b8605Smrg struct r600_bytecode_kcache *kcache = kcache_sets; 1083848b8605Smrg int r; 1084848b8605Smrg 1085848b8605Smrg memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1086848b8605Smrg 1087848b8605Smrg if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1088848b8605Smrg /* can't alloc, need to start new clause */ 1089848b8605Smrg if ((r = r600_bytecode_add_cf(bc))) { 1090848b8605Smrg return r; 1091848b8605Smrg } 1092848b8605Smrg bc->cf_last->op = type; 1093848b8605Smrg 1094848b8605Smrg /* retry with the new clause */ 1095848b8605Smrg kcache = bc->cf_last->kcache; 1096848b8605Smrg if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1097848b8605Smrg /* can't alloc again- should never happen */ 1098848b8605Smrg return r; 1099848b8605Smrg } 1100848b8605Smrg } else { 1101848b8605Smrg /* update kcache sets */ 1102848b8605Smrg memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1103848b8605Smrg } 1104848b8605Smrg 1105b8e80941Smrg /* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */ 1106b8e80941Smrg if (kcache[2].mode != V_SQ_CF_KCACHE_NOP || 1107b8e80941Smrg kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) { 1108848b8605Smrg if (bc->chip_class < EVERGREEN) 1109848b8605Smrg return -ENOMEM; 1110848b8605Smrg bc->cf_last->eg_alu_extended = 1; 1111848b8605Smrg } 1112848b8605Smrg 1113848b8605Smrg return 0; 1114848b8605Smrg} 1115848b8605Smrg 1116848b8605Smrgstatic int insert_nop_r6xx(struct r600_bytecode *bc) 1117848b8605Smrg{ 1118848b8605Smrg struct r600_bytecode_alu alu; 1119848b8605Smrg int r, i; 1120848b8605Smrg 1121848b8605Smrg for (i = 0; i < 4; i++) { 1122848b8605Smrg memset(&alu, 0, sizeof(alu)); 1123848b8605Smrg alu.op = ALU_OP0_NOP; 1124848b8605Smrg alu.src[0].chan = i; 1125848b8605Smrg alu.dst.chan = i; 1126848b8605Smrg alu.last = (i == 3); 1127848b8605Smrg r = r600_bytecode_add_alu(bc, &alu); 1128848b8605Smrg if (r) 1129848b8605Smrg return r; 1130848b8605Smrg } 1131848b8605Smrg return 0; 1132848b8605Smrg} 1133848b8605Smrg 1134848b8605Smrg/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1135848b8605Smrgstatic int load_ar_r6xx(struct r600_bytecode *bc) 1136848b8605Smrg{ 1137848b8605Smrg struct r600_bytecode_alu alu; 1138848b8605Smrg int r; 1139848b8605Smrg 1140848b8605Smrg if (bc->ar_loaded) 1141848b8605Smrg return 0; 1142848b8605Smrg 1143848b8605Smrg /* hack to avoid making MOVA the last instruction in the clause */ 1144848b8605Smrg if ((bc->cf_last->ndw>>1) >= 110) 1145848b8605Smrg bc->force_add_cf = 1; 1146848b8605Smrg 1147848b8605Smrg memset(&alu, 0, sizeof(alu)); 1148848b8605Smrg alu.op = ALU_OP1_MOVA_GPR_INT; 1149848b8605Smrg alu.src[0].sel = bc->ar_reg; 1150848b8605Smrg alu.src[0].chan = bc->ar_chan; 1151848b8605Smrg alu.last = 1; 1152848b8605Smrg alu.index_mode = INDEX_MODE_LOOP; 1153848b8605Smrg r = r600_bytecode_add_alu(bc, &alu); 1154848b8605Smrg if (r) 1155848b8605Smrg return r; 1156848b8605Smrg 1157848b8605Smrg /* no requirement to set uses waterfall on MOVA_GPR_INT */ 1158848b8605Smrg bc->ar_loaded = 1; 1159848b8605Smrg return 0; 1160848b8605Smrg} 1161848b8605Smrg 1162848b8605Smrg/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1163848b8605Smrgstatic int load_ar(struct r600_bytecode *bc) 1164848b8605Smrg{ 1165848b8605Smrg struct r600_bytecode_alu alu; 1166848b8605Smrg int r; 1167848b8605Smrg 1168848b8605Smrg if (bc->ar_handling) 1169848b8605Smrg return load_ar_r6xx(bc); 1170848b8605Smrg 1171848b8605Smrg if (bc->ar_loaded) 1172848b8605Smrg return 0; 1173848b8605Smrg 1174848b8605Smrg /* hack to avoid making MOVA the last instruction in the clause */ 1175848b8605Smrg if ((bc->cf_last->ndw>>1) >= 110) 1176848b8605Smrg bc->force_add_cf = 1; 1177848b8605Smrg 1178848b8605Smrg memset(&alu, 0, sizeof(alu)); 1179848b8605Smrg alu.op = ALU_OP1_MOVA_INT; 1180848b8605Smrg alu.src[0].sel = bc->ar_reg; 1181848b8605Smrg alu.src[0].chan = bc->ar_chan; 1182848b8605Smrg alu.last = 1; 1183848b8605Smrg r = r600_bytecode_add_alu(bc, &alu); 1184848b8605Smrg if (r) 1185848b8605Smrg return r; 1186848b8605Smrg 1187848b8605Smrg bc->cf_last->r6xx_uses_waterfall = 1; 1188848b8605Smrg bc->ar_loaded = 1; 1189848b8605Smrg return 0; 1190848b8605Smrg} 1191848b8605Smrg 1192848b8605Smrgint r600_bytecode_add_alu_type(struct r600_bytecode *bc, 1193848b8605Smrg const struct r600_bytecode_alu *alu, unsigned type) 1194848b8605Smrg{ 1195848b8605Smrg struct r600_bytecode_alu *nalu = r600_bytecode_alu(); 1196848b8605Smrg struct r600_bytecode_alu *lalu; 1197848b8605Smrg int i, r; 1198848b8605Smrg 1199b8e80941Smrg if (!nalu) 1200848b8605Smrg return -ENOMEM; 1201848b8605Smrg memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); 1202848b8605Smrg 1203b8e80941Smrg if (alu->is_op3) { 1204b8e80941Smrg /* will fail later since alu does not support it. */ 1205b8e80941Smrg assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); 1206b8e80941Smrg } 1207b8e80941Smrg 1208848b8605Smrg if (bc->cf_last != NULL && bc->cf_last->op != type) { 1209848b8605Smrg /* check if we could add it anyway */ 1210848b8605Smrg if (bc->cf_last->op == CF_OP_ALU && 1211848b8605Smrg type == CF_OP_ALU_PUSH_BEFORE) { 1212848b8605Smrg LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { 1213848b8605Smrg if (lalu->execute_mask) { 1214848b8605Smrg bc->force_add_cf = 1; 1215848b8605Smrg break; 1216848b8605Smrg } 1217848b8605Smrg } 1218848b8605Smrg } else 1219848b8605Smrg bc->force_add_cf = 1; 1220848b8605Smrg } 1221848b8605Smrg 1222848b8605Smrg /* cf can contains only alu or only vtx or only tex */ 1223848b8605Smrg if (bc->cf_last == NULL || bc->force_add_cf) { 1224848b8605Smrg r = r600_bytecode_add_cf(bc); 1225848b8605Smrg if (r) { 1226848b8605Smrg free(nalu); 1227848b8605Smrg return r; 1228848b8605Smrg } 1229848b8605Smrg } 1230848b8605Smrg bc->cf_last->op = type; 1231848b8605Smrg 1232b8e80941Smrg /* Load index register if required */ 1233b8e80941Smrg if (bc->chip_class >= EVERGREEN) { 1234b8e80941Smrg for (i = 0; i < 3; i++) 1235b8e80941Smrg if (nalu->src[i].kc_bank && nalu->src[i].kc_rel) 1236b8e80941Smrg egcm_load_index_reg(bc, 0, true); 1237b8e80941Smrg } 1238b8e80941Smrg 1239848b8605Smrg /* Check AR usage and load it if required */ 1240848b8605Smrg for (i = 0; i < 3; i++) 1241848b8605Smrg if (nalu->src[i].rel && !bc->ar_loaded) 1242848b8605Smrg load_ar(bc); 1243848b8605Smrg 1244848b8605Smrg if (nalu->dst.rel && !bc->ar_loaded) 1245848b8605Smrg load_ar(bc); 1246848b8605Smrg 1247848b8605Smrg /* Setup the kcache for this ALU instruction. This will start a new 1248848b8605Smrg * ALU clause if needed. */ 1249848b8605Smrg if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { 1250848b8605Smrg free(nalu); 1251848b8605Smrg return r; 1252848b8605Smrg } 1253848b8605Smrg 1254848b8605Smrg if (!bc->cf_last->curr_bs_head) { 1255848b8605Smrg bc->cf_last->curr_bs_head = nalu; 1256848b8605Smrg } 1257848b8605Smrg /* number of gpr == the last gpr used in any alu */ 1258848b8605Smrg for (i = 0; i < 3; i++) { 1259848b8605Smrg if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { 1260848b8605Smrg bc->ngpr = nalu->src[i].sel + 1; 1261848b8605Smrg } 1262848b8605Smrg if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) 1263848b8605Smrg r600_bytecode_special_constants(nalu->src[i].value, 1264b8e80941Smrg &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs); 1265848b8605Smrg } 1266848b8605Smrg if (nalu->dst.sel >= bc->ngpr) { 1267848b8605Smrg bc->ngpr = nalu->dst.sel + 1; 1268848b8605Smrg } 1269848b8605Smrg LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); 1270848b8605Smrg /* each alu use 2 dwords */ 1271848b8605Smrg bc->cf_last->ndw += 2; 1272848b8605Smrg bc->ndw += 2; 1273848b8605Smrg 1274848b8605Smrg /* process cur ALU instructions for bank swizzle */ 1275848b8605Smrg if (nalu->last) { 1276848b8605Smrg uint32_t literal[4]; 1277848b8605Smrg unsigned nliteral; 1278848b8605Smrg struct r600_bytecode_alu *slots[5]; 1279848b8605Smrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 1280848b8605Smrg r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); 1281848b8605Smrg if (r) 1282848b8605Smrg return r; 1283848b8605Smrg 1284848b8605Smrg if (bc->cf_last->prev_bs_head) { 1285848b8605Smrg r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); 1286848b8605Smrg if (r) 1287848b8605Smrg return r; 1288848b8605Smrg } 1289848b8605Smrg 1290848b8605Smrg if (bc->cf_last->prev_bs_head) { 1291848b8605Smrg r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); 1292848b8605Smrg if (r) 1293848b8605Smrg return r; 1294848b8605Smrg } 1295848b8605Smrg 1296848b8605Smrg r = check_and_set_bank_swizzle(bc, slots); 1297848b8605Smrg if (r) 1298848b8605Smrg return r; 1299848b8605Smrg 1300848b8605Smrg for (i = 0, nliteral = 0; i < max_slots; i++) { 1301848b8605Smrg if (slots[i]) { 1302b8e80941Smrg r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral); 1303848b8605Smrg if (r) 1304848b8605Smrg return r; 1305848b8605Smrg } 1306848b8605Smrg } 1307848b8605Smrg bc->cf_last->ndw += align(nliteral, 2); 1308848b8605Smrg 1309848b8605Smrg /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) 1310848b8605Smrg * worst case */ 1311848b8605Smrg if ((bc->cf_last->ndw >> 1) >= 120) { 1312848b8605Smrg bc->force_add_cf = 1; 1313848b8605Smrg } 1314848b8605Smrg 1315848b8605Smrg bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; 1316848b8605Smrg bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; 1317848b8605Smrg bc->cf_last->curr_bs_head = NULL; 1318848b8605Smrg } 1319848b8605Smrg 1320848b8605Smrg if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst) 1321848b8605Smrg insert_nop_r6xx(bc); 1322848b8605Smrg 1323b8e80941Smrg /* Might need to insert spill write ops after current clause */ 1324b8e80941Smrg if (nalu->last && bc->n_pending_outputs) { 1325b8e80941Smrg while (bc->n_pending_outputs) { 1326b8e80941Smrg r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]); 1327b8e80941Smrg if (r) 1328b8e80941Smrg return r; 1329b8e80941Smrg } 1330b8e80941Smrg } 1331b8e80941Smrg 1332848b8605Smrg return 0; 1333848b8605Smrg} 1334848b8605Smrg 1335848b8605Smrgint r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) 1336848b8605Smrg{ 1337848b8605Smrg return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU); 1338848b8605Smrg} 1339848b8605Smrg 1340848b8605Smrgstatic unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) 1341848b8605Smrg{ 1342848b8605Smrg switch (bc->chip_class) { 1343848b8605Smrg case R600: 1344848b8605Smrg return 8; 1345848b8605Smrg 1346848b8605Smrg case R700: 1347848b8605Smrg case EVERGREEN: 1348848b8605Smrg case CAYMAN: 1349848b8605Smrg return 16; 1350848b8605Smrg 1351848b8605Smrg default: 1352848b8605Smrg R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1353848b8605Smrg return 8; 1354848b8605Smrg } 1355848b8605Smrg} 1356848b8605Smrg 1357848b8605Smrgstatic inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc) 1358848b8605Smrg{ 1359848b8605Smrg return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) && 1360b8e80941Smrg bc->cf_last->op != CF_OP_GDS && 1361b8e80941Smrg (bc->chip_class == CAYMAN || 1362b8e80941Smrg bc->cf_last->op != CF_OP_TEX)); 1363848b8605Smrg} 1364848b8605Smrg 1365b8e80941Smrgstatic int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx, 1366b8e80941Smrg bool use_tc) 1367848b8605Smrg{ 1368848b8605Smrg struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); 1369848b8605Smrg int r; 1370848b8605Smrg 1371b8e80941Smrg if (!nvtx) 1372848b8605Smrg return -ENOMEM; 1373848b8605Smrg memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); 1374848b8605Smrg 1375b8e80941Smrg /* Load index register if required */ 1376b8e80941Smrg if (bc->chip_class >= EVERGREEN) { 1377b8e80941Smrg if (vtx->buffer_index_mode) 1378b8e80941Smrg egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false); 1379b8e80941Smrg } 1380b8e80941Smrg 1381848b8605Smrg /* cf can contains only alu or only vtx or only tex */ 1382848b8605Smrg if (bc->cf_last == NULL || 1383848b8605Smrg last_inst_was_not_vtx_fetch(bc) || 1384848b8605Smrg bc->force_add_cf) { 1385848b8605Smrg r = r600_bytecode_add_cf(bc); 1386848b8605Smrg if (r) { 1387848b8605Smrg free(nvtx); 1388848b8605Smrg return r; 1389848b8605Smrg } 1390848b8605Smrg switch (bc->chip_class) { 1391848b8605Smrg case R600: 1392848b8605Smrg case R700: 1393848b8605Smrg bc->cf_last->op = CF_OP_VTX; 1394848b8605Smrg break; 1395b8e80941Smrg case EVERGREEN: 1396b8e80941Smrg if (use_tc) 1397b8e80941Smrg bc->cf_last->op = CF_OP_TEX; 1398b8e80941Smrg else 1399b8e80941Smrg bc->cf_last->op = CF_OP_VTX; 1400b8e80941Smrg break; 1401848b8605Smrg case CAYMAN: 1402848b8605Smrg bc->cf_last->op = CF_OP_TEX; 1403848b8605Smrg break; 1404848b8605Smrg default: 1405848b8605Smrg R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1406848b8605Smrg free(nvtx); 1407848b8605Smrg return -EINVAL; 1408848b8605Smrg } 1409848b8605Smrg } 1410848b8605Smrg LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); 1411848b8605Smrg /* each fetch use 4 dwords */ 1412848b8605Smrg bc->cf_last->ndw += 4; 1413848b8605Smrg bc->ndw += 4; 1414848b8605Smrg if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1415848b8605Smrg bc->force_add_cf = 1; 1416848b8605Smrg 1417848b8605Smrg bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1); 1418848b8605Smrg bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1); 1419848b8605Smrg 1420848b8605Smrg return 0; 1421848b8605Smrg} 1422848b8605Smrg 1423b8e80941Smrgint r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 1424b8e80941Smrg{ 1425b8e80941Smrg return r600_bytecode_add_vtx_internal(bc, vtx, false); 1426b8e80941Smrg} 1427b8e80941Smrg 1428b8e80941Smrgint r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 1429b8e80941Smrg{ 1430b8e80941Smrg return r600_bytecode_add_vtx_internal(bc, vtx, true); 1431b8e80941Smrg} 1432b8e80941Smrg 1433848b8605Smrgint r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) 1434848b8605Smrg{ 1435848b8605Smrg struct r600_bytecode_tex *ntex = r600_bytecode_tex(); 1436848b8605Smrg int r; 1437848b8605Smrg 1438b8e80941Smrg if (!ntex) 1439848b8605Smrg return -ENOMEM; 1440848b8605Smrg memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); 1441848b8605Smrg 1442b8e80941Smrg /* Load index register if required */ 1443b8e80941Smrg if (bc->chip_class >= EVERGREEN) { 1444b8e80941Smrg if (tex->sampler_index_mode || tex->resource_index_mode) 1445b8e80941Smrg egcm_load_index_reg(bc, 1, false); 1446b8e80941Smrg } 1447b8e80941Smrg 1448848b8605Smrg /* we can't fetch data und use it as texture lookup address in the same TEX clause */ 1449848b8605Smrg if (bc->cf_last != NULL && 1450848b8605Smrg bc->cf_last->op == CF_OP_TEX) { 1451848b8605Smrg struct r600_bytecode_tex *ttex; 1452848b8605Smrg LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { 1453848b8605Smrg if (ttex->dst_gpr == ntex->src_gpr) { 1454848b8605Smrg bc->force_add_cf = 1; 1455848b8605Smrg break; 1456848b8605Smrg } 1457848b8605Smrg } 1458848b8605Smrg /* slight hack to make gradients always go into same cf */ 1459848b8605Smrg if (ntex->op == FETCH_OP_SET_GRADIENTS_H) 1460848b8605Smrg bc->force_add_cf = 1; 1461848b8605Smrg } 1462848b8605Smrg 1463848b8605Smrg /* cf can contains only alu or only vtx or only tex */ 1464848b8605Smrg if (bc->cf_last == NULL || 1465848b8605Smrg bc->cf_last->op != CF_OP_TEX || 1466848b8605Smrg bc->force_add_cf) { 1467848b8605Smrg r = r600_bytecode_add_cf(bc); 1468848b8605Smrg if (r) { 1469848b8605Smrg free(ntex); 1470848b8605Smrg return r; 1471848b8605Smrg } 1472848b8605Smrg bc->cf_last->op = CF_OP_TEX; 1473848b8605Smrg } 1474848b8605Smrg if (ntex->src_gpr >= bc->ngpr) { 1475848b8605Smrg bc->ngpr = ntex->src_gpr + 1; 1476848b8605Smrg } 1477848b8605Smrg if (ntex->dst_gpr >= bc->ngpr) { 1478848b8605Smrg bc->ngpr = ntex->dst_gpr + 1; 1479848b8605Smrg } 1480848b8605Smrg LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); 1481848b8605Smrg /* each texture fetch use 4 dwords */ 1482848b8605Smrg bc->cf_last->ndw += 4; 1483848b8605Smrg bc->ndw += 4; 1484848b8605Smrg if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1485848b8605Smrg bc->force_add_cf = 1; 1486848b8605Smrg return 0; 1487848b8605Smrg} 1488848b8605Smrg 1489b8e80941Smrgint r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds) 1490b8e80941Smrg{ 1491b8e80941Smrg struct r600_bytecode_gds *ngds = r600_bytecode_gds(); 1492b8e80941Smrg int r; 1493b8e80941Smrg 1494b8e80941Smrg if (ngds == NULL) 1495b8e80941Smrg return -ENOMEM; 1496b8e80941Smrg memcpy(ngds, gds, sizeof(struct r600_bytecode_gds)); 1497b8e80941Smrg 1498b8e80941Smrg if (bc->chip_class >= EVERGREEN) { 1499b8e80941Smrg if (gds->uav_index_mode) 1500b8e80941Smrg egcm_load_index_reg(bc, gds->uav_index_mode - 1, false); 1501b8e80941Smrg } 1502b8e80941Smrg 1503b8e80941Smrg if (bc->cf_last == NULL || 1504b8e80941Smrg bc->cf_last->op != CF_OP_GDS || 1505b8e80941Smrg bc->force_add_cf) { 1506b8e80941Smrg r = r600_bytecode_add_cf(bc); 1507b8e80941Smrg if (r) { 1508b8e80941Smrg free(ngds); 1509b8e80941Smrg return r; 1510b8e80941Smrg } 1511b8e80941Smrg bc->cf_last->op = CF_OP_GDS; 1512b8e80941Smrg } 1513b8e80941Smrg 1514b8e80941Smrg LIST_ADDTAIL(&ngds->list, &bc->cf_last->gds); 1515b8e80941Smrg bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */ 1516b8e80941Smrg if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1517b8e80941Smrg bc->force_add_cf = 1; 1518b8e80941Smrg return 0; 1519b8e80941Smrg} 1520b8e80941Smrg 1521848b8605Smrgint r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op) 1522848b8605Smrg{ 1523848b8605Smrg int r; 1524b8e80941Smrg 1525b8e80941Smrg /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */ 1526b8e80941Smrg if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) { 1527b8e80941Smrg bc->need_wait_ack = false; 1528b8e80941Smrg r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK); 1529b8e80941Smrg } 1530b8e80941Smrg 1531848b8605Smrg r = r600_bytecode_add_cf(bc); 1532848b8605Smrg if (r) 1533848b8605Smrg return r; 1534848b8605Smrg 1535848b8605Smrg bc->cf_last->cond = V_SQ_CF_COND_ACTIVE; 1536848b8605Smrg bc->cf_last->op = op; 1537848b8605Smrg return 0; 1538848b8605Smrg} 1539848b8605Smrg 1540848b8605Smrgint cm_bytecode_add_cf_end(struct r600_bytecode *bc) 1541848b8605Smrg{ 1542848b8605Smrg return r600_bytecode_add_cfinst(bc, CF_OP_CF_END); 1543848b8605Smrg} 1544848b8605Smrg 1545848b8605Smrg/* common to all 3 families */ 1546848b8605Smrgstatic int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) 1547848b8605Smrg{ 1548b8e80941Smrg if (r600_isa_fetch(vtx->op)->flags & FF_MEM) 1549b8e80941Smrg return r700_bytecode_fetch_mem_build(bc, vtx, id); 1550b8e80941Smrg bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) | 1551b8e80941Smrg S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | 1552848b8605Smrg S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | 1553848b8605Smrg S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | 1554848b8605Smrg S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); 1555848b8605Smrg if (bc->chip_class < CAYMAN) 1556848b8605Smrg bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); 1557848b8605Smrg id++; 1558848b8605Smrg bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | 1559848b8605Smrg S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | 1560848b8605Smrg S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | 1561848b8605Smrg S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | 1562848b8605Smrg S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | 1563848b8605Smrg S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | 1564848b8605Smrg S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | 1565848b8605Smrg S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | 1566848b8605Smrg S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | 1567848b8605Smrg S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); 1568848b8605Smrg bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| 1569848b8605Smrg S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); 1570b8e80941Smrg if (bc->chip_class >= EVERGREEN) 1571b8e80941Smrg bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode); 1572848b8605Smrg if (bc->chip_class < CAYMAN) 1573848b8605Smrg bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); 1574848b8605Smrg id++; 1575848b8605Smrg bc->bytecode[id++] = 0; 1576848b8605Smrg return 0; 1577848b8605Smrg} 1578848b8605Smrg 1579848b8605Smrg/* common to all 3 families */ 1580848b8605Smrgstatic int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) 1581848b8605Smrg{ 1582b8e80941Smrg bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST( 1583848b8605Smrg r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) | 1584848b8605Smrg EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) | 1585848b8605Smrg S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | 1586848b8605Smrg S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | 1587848b8605Smrg S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); 1588b8e80941Smrg if (bc->chip_class >= EVERGREEN) 1589b8e80941Smrg bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode); 1590b8e80941Smrg ((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode) 1591b8e80941Smrg id++; 1592848b8605Smrg bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | 1593848b8605Smrg S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | 1594848b8605Smrg S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | 1595848b8605Smrg S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | 1596848b8605Smrg S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | 1597848b8605Smrg S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | 1598848b8605Smrg S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | 1599848b8605Smrg S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | 1600848b8605Smrg S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | 1601848b8605Smrg S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | 1602848b8605Smrg S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); 1603848b8605Smrg bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | 1604848b8605Smrg S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | 1605848b8605Smrg S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | 1606848b8605Smrg S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | 1607848b8605Smrg S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | 1608848b8605Smrg S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | 1609848b8605Smrg S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | 1610848b8605Smrg S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); 1611848b8605Smrg bc->bytecode[id++] = 0; 1612848b8605Smrg return 0; 1613848b8605Smrg} 1614848b8605Smrg 1615848b8605Smrg/* r600 only, r700/eg bits in r700_asm.c */ 1616848b8605Smrgstatic int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) 1617848b8605Smrg{ 1618848b8605Smrg unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op); 1619848b8605Smrg 1620848b8605Smrg /* don't replace gpr by pv or ps for destination register */ 1621848b8605Smrg bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | 1622848b8605Smrg S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | 1623848b8605Smrg S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | 1624848b8605Smrg S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | 1625848b8605Smrg S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | 1626848b8605Smrg S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | 1627848b8605Smrg S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | 1628848b8605Smrg S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | 1629848b8605Smrg S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) | 1630848b8605Smrg S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) | 1631848b8605Smrg S_SQ_ALU_WORD0_LAST(alu->last); 1632848b8605Smrg 1633848b8605Smrg if (alu->is_op3) { 1634b8e80941Smrg assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); 1635848b8605Smrg bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1636848b8605Smrg S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1637848b8605Smrg S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1638848b8605Smrg S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1639848b8605Smrg S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | 1640848b8605Smrg S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | 1641848b8605Smrg S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | 1642848b8605Smrg S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | 1643848b8605Smrg S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) | 1644848b8605Smrg S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle); 1645848b8605Smrg } else { 1646848b8605Smrg bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1647848b8605Smrg S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1648848b8605Smrg S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1649848b8605Smrg S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1650848b8605Smrg S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | 1651848b8605Smrg S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | 1652848b8605Smrg S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | 1653848b8605Smrg S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | 1654848b8605Smrg S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) | 1655848b8605Smrg S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | 1656848b8605Smrg S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) | 1657848b8605Smrg S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred); 1658848b8605Smrg } 1659848b8605Smrg return 0; 1660848b8605Smrg} 1661848b8605Smrg 1662848b8605Smrgstatic void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) 1663848b8605Smrg{ 1664848b8605Smrg *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); 1665848b8605Smrg *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) | 1666848b8605Smrg S_SQ_CF_WORD1_BARRIER(1) | 1667b8e80941Smrg S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)| 1668b8e80941Smrg S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); 1669848b8605Smrg} 1670848b8605Smrg 1671848b8605Smrg/* common for r600/r700 - eg in eg_asm.c */ 1672848b8605Smrgstatic int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) 1673848b8605Smrg{ 1674848b8605Smrg unsigned id = cf->id; 1675848b8605Smrg const struct cf_op_info *cfop = r600_isa_cf(cf->op); 1676848b8605Smrg unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op); 1677848b8605Smrg 1678848b8605Smrg 1679848b8605Smrg if (cf->op == CF_NATIVE) { 1680848b8605Smrg bc->bytecode[id++] = cf->isa[0]; 1681848b8605Smrg bc->bytecode[id++] = cf->isa[1]; 1682848b8605Smrg } else if (cfop->flags & CF_ALU) { 1683848b8605Smrg bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | 1684848b8605Smrg S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | 1685848b8605Smrg S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | 1686848b8605Smrg S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); 1687848b8605Smrg 1688848b8605Smrg bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) | 1689848b8605Smrg S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | 1690848b8605Smrg S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | 1691848b8605Smrg S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | 1692848b8605Smrg S_SQ_CF_ALU_WORD1_BARRIER(1) | 1693848b8605Smrg S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | 1694848b8605Smrg S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); 1695848b8605Smrg } else if (cfop->flags & CF_FETCH) { 1696848b8605Smrg if (bc->chip_class == R700) 1697848b8605Smrg r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1698848b8605Smrg else 1699848b8605Smrg r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1700848b8605Smrg } else if (cfop->flags & CF_EXP) { 1701848b8605Smrg bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1702848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1703848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1704848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) | 1705848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr); 1706848b8605Smrg bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1707848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | 1708848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | 1709848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | 1710848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | 1711848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | 1712848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | 1713848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); 1714848b8605Smrg } else if (cfop->flags & CF_MEM) { 1715848b8605Smrg bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1716848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1717848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1718848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) | 1719848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr); 1720848b8605Smrg bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1721848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | 1722848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | 1723848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) | 1724848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) | 1725848b8605Smrg S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask); 1726848b8605Smrg } else { 1727848b8605Smrg bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); 1728848b8605Smrg bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) | 1729848b8605Smrg S_SQ_CF_WORD1_BARRIER(1) | 1730848b8605Smrg S_SQ_CF_WORD1_COND(cf->cond) | 1731848b8605Smrg S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | 1732848b8605Smrg S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); 1733848b8605Smrg } 1734848b8605Smrg return 0; 1735848b8605Smrg} 1736848b8605Smrg 1737848b8605Smrgint r600_bytecode_build(struct r600_bytecode *bc) 1738848b8605Smrg{ 1739848b8605Smrg struct r600_bytecode_cf *cf; 1740848b8605Smrg struct r600_bytecode_alu *alu; 1741848b8605Smrg struct r600_bytecode_vtx *vtx; 1742848b8605Smrg struct r600_bytecode_tex *tex; 1743b8e80941Smrg struct r600_bytecode_gds *gds; 1744848b8605Smrg uint32_t literal[4]; 1745848b8605Smrg unsigned nliteral; 1746848b8605Smrg unsigned addr; 1747848b8605Smrg int i, r; 1748848b8605Smrg 1749b8e80941Smrg if (!bc->nstack) { // If not 0, Stack_size already provided by llvm 1750b8e80941Smrg if (bc->stack.max_entries) 1751b8e80941Smrg bc->nstack = bc->stack.max_entries; 1752b8e80941Smrg else if (bc->type == PIPE_SHADER_VERTEX || 1753b8e80941Smrg bc->type == PIPE_SHADER_TESS_EVAL || 1754b8e80941Smrg bc->type == PIPE_SHADER_TESS_CTRL) 1755b8e80941Smrg bc->nstack = 1; 1756848b8605Smrg } 1757848b8605Smrg 1758848b8605Smrg /* first path compute addr of each CF block */ 1759848b8605Smrg /* addr start after all the CF instructions */ 1760848b8605Smrg addr = bc->cf_last->id + 2; 1761848b8605Smrg LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1762848b8605Smrg if (r600_isa_cf(cf->op)->flags & CF_FETCH) { 1763848b8605Smrg addr += 3; 1764848b8605Smrg addr &= 0xFFFFFFFCUL; 1765848b8605Smrg } 1766848b8605Smrg cf->addr = addr; 1767848b8605Smrg addr += cf->ndw; 1768848b8605Smrg bc->ndw = cf->addr + cf->ndw; 1769848b8605Smrg } 1770848b8605Smrg free(bc->bytecode); 1771b8e80941Smrg bc->bytecode = calloc(4, bc->ndw); 1772848b8605Smrg if (bc->bytecode == NULL) 1773848b8605Smrg return -ENOMEM; 1774848b8605Smrg LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1775848b8605Smrg const struct cf_op_info *cfop = r600_isa_cf(cf->op); 1776848b8605Smrg addr = cf->addr; 1777848b8605Smrg if (bc->chip_class >= EVERGREEN) 1778848b8605Smrg r = eg_bytecode_cf_build(bc, cf); 1779848b8605Smrg else 1780848b8605Smrg r = r600_bytecode_cf_build(bc, cf); 1781848b8605Smrg if (r) 1782848b8605Smrg return r; 1783848b8605Smrg if (cfop->flags & CF_ALU) { 1784848b8605Smrg nliteral = 0; 1785848b8605Smrg memset(literal, 0, sizeof(literal)); 1786848b8605Smrg LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 1787b8e80941Smrg r = r600_bytecode_alu_nliterals(alu, literal, &nliteral); 1788848b8605Smrg if (r) 1789848b8605Smrg return r; 1790b8e80941Smrg r600_bytecode_alu_adjust_literals(alu, literal, nliteral); 1791b8e80941Smrg r600_bytecode_assign_kcache_banks(alu, cf->kcache); 1792848b8605Smrg 1793848b8605Smrg switch(bc->chip_class) { 1794848b8605Smrg case R600: 1795848b8605Smrg r = r600_bytecode_alu_build(bc, alu, addr); 1796848b8605Smrg break; 1797848b8605Smrg case R700: 1798848b8605Smrg r = r700_bytecode_alu_build(bc, alu, addr); 1799848b8605Smrg break; 1800b8e80941Smrg case EVERGREEN: 1801b8e80941Smrg case CAYMAN: 1802b8e80941Smrg r = eg_bytecode_alu_build(bc, alu, addr); 1803b8e80941Smrg break; 1804848b8605Smrg default: 1805848b8605Smrg R600_ERR("unknown chip class %d.\n", bc->chip_class); 1806848b8605Smrg return -EINVAL; 1807848b8605Smrg } 1808848b8605Smrg if (r) 1809848b8605Smrg return r; 1810848b8605Smrg addr += 2; 1811848b8605Smrg if (alu->last) { 1812848b8605Smrg for (i = 0; i < align(nliteral, 2); ++i) { 1813848b8605Smrg bc->bytecode[addr++] = literal[i]; 1814848b8605Smrg } 1815848b8605Smrg nliteral = 0; 1816848b8605Smrg memset(literal, 0, sizeof(literal)); 1817848b8605Smrg } 1818848b8605Smrg } 1819848b8605Smrg } else if (cf->op == CF_OP_VTX) { 1820848b8605Smrg LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 1821848b8605Smrg r = r600_bytecode_vtx_build(bc, vtx, addr); 1822848b8605Smrg if (r) 1823848b8605Smrg return r; 1824848b8605Smrg addr += 4; 1825848b8605Smrg } 1826b8e80941Smrg } else if (cf->op == CF_OP_GDS) { 1827b8e80941Smrg assert(bc->chip_class >= EVERGREEN); 1828b8e80941Smrg LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { 1829b8e80941Smrg r = eg_bytecode_gds_build(bc, gds, addr); 1830b8e80941Smrg if (r) 1831b8e80941Smrg return r; 1832b8e80941Smrg addr += 4; 1833b8e80941Smrg } 1834848b8605Smrg } else if (cf->op == CF_OP_TEX) { 1835848b8605Smrg LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 1836848b8605Smrg assert(bc->chip_class >= EVERGREEN); 1837848b8605Smrg r = r600_bytecode_vtx_build(bc, vtx, addr); 1838848b8605Smrg if (r) 1839848b8605Smrg return r; 1840848b8605Smrg addr += 4; 1841848b8605Smrg } 1842848b8605Smrg LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 1843848b8605Smrg r = r600_bytecode_tex_build(bc, tex, addr); 1844848b8605Smrg if (r) 1845848b8605Smrg return r; 1846848b8605Smrg addr += 4; 1847848b8605Smrg } 1848848b8605Smrg } 1849848b8605Smrg } 1850848b8605Smrg return 0; 1851848b8605Smrg} 1852848b8605Smrg 1853848b8605Smrgvoid r600_bytecode_clear(struct r600_bytecode *bc) 1854848b8605Smrg{ 1855848b8605Smrg struct r600_bytecode_cf *cf = NULL, *next_cf; 1856848b8605Smrg 1857848b8605Smrg free(bc->bytecode); 1858848b8605Smrg bc->bytecode = NULL; 1859848b8605Smrg 1860848b8605Smrg LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { 1861848b8605Smrg struct r600_bytecode_alu *alu = NULL, *next_alu; 1862848b8605Smrg struct r600_bytecode_tex *tex = NULL, *next_tex; 1863848b8605Smrg struct r600_bytecode_tex *vtx = NULL, *next_vtx; 1864b8e80941Smrg struct r600_bytecode_gds *gds = NULL, *next_gds; 1865848b8605Smrg 1866848b8605Smrg LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { 1867848b8605Smrg free(alu); 1868848b8605Smrg } 1869848b8605Smrg 1870848b8605Smrg LIST_INITHEAD(&cf->alu); 1871848b8605Smrg 1872848b8605Smrg LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) { 1873848b8605Smrg free(tex); 1874848b8605Smrg } 1875848b8605Smrg 1876848b8605Smrg LIST_INITHEAD(&cf->tex); 1877848b8605Smrg 1878848b8605Smrg LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) { 1879848b8605Smrg free(vtx); 1880848b8605Smrg } 1881848b8605Smrg 1882848b8605Smrg LIST_INITHEAD(&cf->vtx); 1883848b8605Smrg 1884b8e80941Smrg LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) { 1885b8e80941Smrg free(gds); 1886b8e80941Smrg } 1887b8e80941Smrg 1888b8e80941Smrg LIST_INITHEAD(&cf->gds); 1889b8e80941Smrg 1890848b8605Smrg free(cf); 1891848b8605Smrg } 1892848b8605Smrg 1893848b8605Smrg LIST_INITHEAD(&cf->list); 1894848b8605Smrg} 1895848b8605Smrg 1896848b8605Smrgstatic int print_swizzle(unsigned swz) 1897848b8605Smrg{ 1898848b8605Smrg const char * swzchars = "xyzw01?_"; 1899848b8605Smrg assert(swz<8 && swz != 6); 1900848b8605Smrg return fprintf(stderr, "%c", swzchars[swz]); 1901848b8605Smrg} 1902848b8605Smrg 1903848b8605Smrgstatic int print_sel(unsigned sel, unsigned rel, unsigned index_mode, 1904848b8605Smrg unsigned need_brackets) 1905848b8605Smrg{ 1906848b8605Smrg int o = 0; 1907848b8605Smrg if (rel && index_mode >= 5 && sel < 128) 1908848b8605Smrg o += fprintf(stderr, "G"); 1909848b8605Smrg if (rel || need_brackets) { 1910848b8605Smrg o += fprintf(stderr, "["); 1911848b8605Smrg } 1912848b8605Smrg o += fprintf(stderr, "%d", sel); 1913848b8605Smrg if (rel) { 1914848b8605Smrg if (index_mode == 0 || index_mode == 6) 1915848b8605Smrg o += fprintf(stderr, "+AR"); 1916848b8605Smrg else if (index_mode == 4) 1917848b8605Smrg o += fprintf(stderr, "+AL"); 1918848b8605Smrg } 1919848b8605Smrg if (rel || need_brackets) { 1920848b8605Smrg o += fprintf(stderr, "]"); 1921848b8605Smrg } 1922848b8605Smrg return o; 1923848b8605Smrg} 1924848b8605Smrg 1925848b8605Smrgstatic int print_dst(struct r600_bytecode_alu *alu) 1926848b8605Smrg{ 1927848b8605Smrg int o = 0; 1928848b8605Smrg unsigned sel = alu->dst.sel; 1929848b8605Smrg char reg_char = 'R'; 1930848b8605Smrg if (sel > 128 - 4) { /* clause temporary gpr */ 1931848b8605Smrg sel -= 128 - 4; 1932848b8605Smrg reg_char = 'T'; 1933848b8605Smrg } 1934848b8605Smrg 1935b8e80941Smrg if (alu_writes(alu)) { 1936848b8605Smrg o += fprintf(stderr, "%c", reg_char); 1937848b8605Smrg o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0); 1938848b8605Smrg } else { 1939848b8605Smrg o += fprintf(stderr, "__"); 1940848b8605Smrg } 1941848b8605Smrg o += fprintf(stderr, "."); 1942848b8605Smrg o += print_swizzle(alu->dst.chan); 1943848b8605Smrg return o; 1944848b8605Smrg} 1945848b8605Smrg 1946848b8605Smrgstatic int print_src(struct r600_bytecode_alu *alu, unsigned idx) 1947848b8605Smrg{ 1948848b8605Smrg int o = 0; 1949848b8605Smrg struct r600_bytecode_alu_src *src = &alu->src[idx]; 1950848b8605Smrg unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0; 1951848b8605Smrg 1952848b8605Smrg if (src->neg) 1953848b8605Smrg o += fprintf(stderr,"-"); 1954848b8605Smrg if (src->abs) 1955848b8605Smrg o += fprintf(stderr,"|"); 1956848b8605Smrg 1957848b8605Smrg if (sel < 128 - 4) { 1958848b8605Smrg o += fprintf(stderr, "R"); 1959848b8605Smrg } else if (sel < 128) { 1960848b8605Smrg o += fprintf(stderr, "T"); 1961848b8605Smrg sel -= 128 - 4; 1962848b8605Smrg } else if (sel < 160) { 1963848b8605Smrg o += fprintf(stderr, "KC0"); 1964848b8605Smrg need_brackets = 1; 1965848b8605Smrg sel -= 128; 1966848b8605Smrg } else if (sel < 192) { 1967848b8605Smrg o += fprintf(stderr, "KC1"); 1968848b8605Smrg need_brackets = 1; 1969848b8605Smrg sel -= 160; 1970848b8605Smrg } else if (sel >= 512) { 1971848b8605Smrg o += fprintf(stderr, "C%d", src->kc_bank); 1972848b8605Smrg need_brackets = 1; 1973848b8605Smrg sel -= 512; 1974848b8605Smrg } else if (sel >= 448) { 1975848b8605Smrg o += fprintf(stderr, "Param"); 1976848b8605Smrg sel -= 448; 1977848b8605Smrg need_chan = 0; 1978848b8605Smrg } else if (sel >= 288) { 1979848b8605Smrg o += fprintf(stderr, "KC3"); 1980848b8605Smrg need_brackets = 1; 1981848b8605Smrg sel -= 288; 1982848b8605Smrg } else if (sel >= 256) { 1983848b8605Smrg o += fprintf(stderr, "KC2"); 1984848b8605Smrg need_brackets = 1; 1985848b8605Smrg sel -= 256; 1986848b8605Smrg } else { 1987848b8605Smrg need_sel = 0; 1988848b8605Smrg need_chan = 0; 1989848b8605Smrg switch (sel) { 1990b8e80941Smrg case EG_V_SQ_ALU_SRC_LDS_DIRECT_A: 1991b8e80941Smrg o += fprintf(stderr, "LDS_A[0x%08X]", src->value); 1992b8e80941Smrg break; 1993b8e80941Smrg case EG_V_SQ_ALU_SRC_LDS_DIRECT_B: 1994b8e80941Smrg o += fprintf(stderr, "LDS_B[0x%08X]", src->value); 1995b8e80941Smrg break; 1996b8e80941Smrg case EG_V_SQ_ALU_SRC_LDS_OQ_A: 1997b8e80941Smrg o += fprintf(stderr, "LDS_OQ_A"); 1998b8e80941Smrg need_chan = 1; 1999b8e80941Smrg break; 2000b8e80941Smrg case EG_V_SQ_ALU_SRC_LDS_OQ_B: 2001b8e80941Smrg o += fprintf(stderr, "LDS_OQ_B"); 2002b8e80941Smrg need_chan = 1; 2003b8e80941Smrg break; 2004b8e80941Smrg case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP: 2005b8e80941Smrg o += fprintf(stderr, "LDS_OQ_A_POP"); 2006b8e80941Smrg need_chan = 1; 2007b8e80941Smrg break; 2008b8e80941Smrg case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP: 2009b8e80941Smrg o += fprintf(stderr, "LDS_OQ_B_POP"); 2010b8e80941Smrg need_chan = 1; 2011b8e80941Smrg break; 2012b8e80941Smrg case EG_V_SQ_ALU_SRC_TIME_LO: 2013b8e80941Smrg o += fprintf(stderr, "TIME_LO"); 2014b8e80941Smrg break; 2015b8e80941Smrg case EG_V_SQ_ALU_SRC_TIME_HI: 2016b8e80941Smrg o += fprintf(stderr, "TIME_HI"); 2017b8e80941Smrg break; 2018b8e80941Smrg case EG_V_SQ_ALU_SRC_SE_ID: 2019b8e80941Smrg o += fprintf(stderr, "SE_ID"); 2020b8e80941Smrg break; 2021b8e80941Smrg case EG_V_SQ_ALU_SRC_SIMD_ID: 2022b8e80941Smrg o += fprintf(stderr, "SIMD_ID"); 2023b8e80941Smrg break; 2024b8e80941Smrg case EG_V_SQ_ALU_SRC_HW_WAVE_ID: 2025b8e80941Smrg o += fprintf(stderr, "HW_WAVE_ID"); 2026b8e80941Smrg break; 2027848b8605Smrg case V_SQ_ALU_SRC_PS: 2028848b8605Smrg o += fprintf(stderr, "PS"); 2029848b8605Smrg break; 2030848b8605Smrg case V_SQ_ALU_SRC_PV: 2031848b8605Smrg o += fprintf(stderr, "PV"); 2032848b8605Smrg need_chan = 1; 2033848b8605Smrg break; 2034848b8605Smrg case V_SQ_ALU_SRC_LITERAL: 2035b8e80941Smrg o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value)); 2036848b8605Smrg break; 2037848b8605Smrg case V_SQ_ALU_SRC_0_5: 2038848b8605Smrg o += fprintf(stderr, "0.5"); 2039848b8605Smrg break; 2040848b8605Smrg case V_SQ_ALU_SRC_M_1_INT: 2041848b8605Smrg o += fprintf(stderr, "-1"); 2042848b8605Smrg break; 2043848b8605Smrg case V_SQ_ALU_SRC_1_INT: 2044848b8605Smrg o += fprintf(stderr, "1"); 2045848b8605Smrg break; 2046848b8605Smrg case V_SQ_ALU_SRC_1: 2047848b8605Smrg o += fprintf(stderr, "1.0"); 2048848b8605Smrg break; 2049848b8605Smrg case V_SQ_ALU_SRC_0: 2050848b8605Smrg o += fprintf(stderr, "0"); 2051848b8605Smrg break; 2052848b8605Smrg default: 2053848b8605Smrg o += fprintf(stderr, "??IMM_%d", sel); 2054848b8605Smrg break; 2055848b8605Smrg } 2056848b8605Smrg } 2057848b8605Smrg 2058848b8605Smrg if (need_sel) 2059848b8605Smrg o += print_sel(sel, src->rel, alu->index_mode, need_brackets); 2060848b8605Smrg 2061848b8605Smrg if (need_chan) { 2062848b8605Smrg o += fprintf(stderr, "."); 2063848b8605Smrg o += print_swizzle(src->chan); 2064848b8605Smrg } 2065848b8605Smrg 2066848b8605Smrg if (src->abs) 2067848b8605Smrg o += fprintf(stderr,"|"); 2068848b8605Smrg 2069848b8605Smrg return o; 2070848b8605Smrg} 2071848b8605Smrg 2072848b8605Smrgstatic int print_indent(int p, int c) 2073848b8605Smrg{ 2074848b8605Smrg int o = 0; 2075848b8605Smrg while (p++ < c) 2076848b8605Smrg o += fprintf(stderr, " "); 2077848b8605Smrg return o; 2078848b8605Smrg} 2079848b8605Smrg 2080848b8605Smrgvoid r600_bytecode_disasm(struct r600_bytecode *bc) 2081848b8605Smrg{ 2082b8e80941Smrg const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"}; 2083848b8605Smrg static int index = 0; 2084848b8605Smrg struct r600_bytecode_cf *cf = NULL; 2085848b8605Smrg struct r600_bytecode_alu *alu = NULL; 2086848b8605Smrg struct r600_bytecode_vtx *vtx = NULL; 2087848b8605Smrg struct r600_bytecode_tex *tex = NULL; 2088b8e80941Smrg struct r600_bytecode_gds *gds = NULL; 2089848b8605Smrg 2090848b8605Smrg unsigned i, id, ngr = 0, last; 2091848b8605Smrg uint32_t literal[4]; 2092848b8605Smrg unsigned nliteral; 2093848b8605Smrg char chip = '6'; 2094848b8605Smrg 2095848b8605Smrg switch (bc->chip_class) { 2096848b8605Smrg case R700: 2097848b8605Smrg chip = '7'; 2098848b8605Smrg break; 2099848b8605Smrg case EVERGREEN: 2100848b8605Smrg chip = 'E'; 2101848b8605Smrg break; 2102848b8605Smrg case CAYMAN: 2103848b8605Smrg chip = 'C'; 2104848b8605Smrg break; 2105848b8605Smrg case R600: 2106848b8605Smrg default: 2107848b8605Smrg chip = '6'; 2108848b8605Smrg break; 2109848b8605Smrg } 2110848b8605Smrg fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n", 2111848b8605Smrg bc->ndw, bc->ngpr, bc->nstack); 2112848b8605Smrg fprintf(stderr, "shader %d -- %c\n", index++, chip); 2113848b8605Smrg 2114848b8605Smrg LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 2115848b8605Smrg id = cf->id; 2116848b8605Smrg if (cf->op == CF_NATIVE) { 2117848b8605Smrg fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id], 2118848b8605Smrg bc->bytecode[id + 1]); 2119848b8605Smrg } else { 2120848b8605Smrg const struct cf_op_info *cfop = r600_isa_cf(cf->op); 2121848b8605Smrg if (cfop->flags & CF_ALU) { 2122848b8605Smrg if (cf->eg_alu_extended) { 2123848b8605Smrg fprintf(stderr, "%04d %08X %08X %s\n", id, bc->bytecode[id], 2124848b8605Smrg bc->bytecode[id + 1], "ALU_EXT"); 2125848b8605Smrg id += 2; 2126848b8605Smrg } 2127848b8605Smrg fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2128848b8605Smrg bc->bytecode[id + 1], cfop->name); 2129848b8605Smrg fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr); 2130848b8605Smrg for (i = 0; i < 4; ++i) { 2131848b8605Smrg if (cf->kcache[i].mode) { 2132848b8605Smrg int c_start = (cf->kcache[i].addr << 4); 2133848b8605Smrg int c_end = c_start + (cf->kcache[i].mode << 4); 2134b8e80941Smrg fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ", 2135b8e80941Smrg i, cf->kcache[i].bank, c_start, c_end, 2136b8e80941Smrg cf->kcache[i].index_mode ? " " : "", 2137b8e80941Smrg cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : ""); 2138848b8605Smrg } 2139848b8605Smrg } 2140848b8605Smrg fprintf(stderr, "\n"); 2141848b8605Smrg } else if (cfop->flags & CF_FETCH) { 2142848b8605Smrg fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2143848b8605Smrg bc->bytecode[id + 1], cfop->name); 2144848b8605Smrg fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr); 2145b8e80941Smrg if (cf->vpm) 2146b8e80941Smrg fprintf(stderr, "VPM "); 2147b8e80941Smrg if (cf->end_of_program) 2148b8e80941Smrg fprintf(stderr, "EOP "); 2149848b8605Smrg fprintf(stderr, "\n"); 2150b8e80941Smrg 2151848b8605Smrg } else if (cfop->flags & CF_EXP) { 2152848b8605Smrg int o = 0; 2153848b8605Smrg const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; 2154848b8605Smrg o += fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2155848b8605Smrg bc->bytecode[id + 1], cfop->name); 2156848b8605Smrg o += print_indent(o, 43); 2157848b8605Smrg o += fprintf(stderr, "%s ", exp_type[cf->output.type]); 2158848b8605Smrg if (cf->output.burst_count > 1) { 2159848b8605Smrg o += fprintf(stderr, "%d-%d ", cf->output.array_base, 2160848b8605Smrg cf->output.array_base + cf->output.burst_count - 1); 2161848b8605Smrg 2162848b8605Smrg o += print_indent(o, 55); 2163848b8605Smrg o += fprintf(stderr, "R%d-%d.", cf->output.gpr, 2164848b8605Smrg cf->output.gpr + cf->output.burst_count - 1); 2165848b8605Smrg } else { 2166848b8605Smrg o += fprintf(stderr, "%d ", cf->output.array_base); 2167848b8605Smrg o += print_indent(o, 55); 2168848b8605Smrg o += fprintf(stderr, "R%d.", cf->output.gpr); 2169848b8605Smrg } 2170848b8605Smrg 2171848b8605Smrg o += print_swizzle(cf->output.swizzle_x); 2172848b8605Smrg o += print_swizzle(cf->output.swizzle_y); 2173848b8605Smrg o += print_swizzle(cf->output.swizzle_z); 2174848b8605Smrg o += print_swizzle(cf->output.swizzle_w); 2175848b8605Smrg 2176848b8605Smrg print_indent(o, 67); 2177848b8605Smrg 2178848b8605Smrg fprintf(stderr, " ES:%X ", cf->output.elem_size); 2179b8e80941Smrg if (cf->mark) 2180b8e80941Smrg fprintf(stderr, "MARK "); 2181848b8605Smrg if (!cf->barrier) 2182848b8605Smrg fprintf(stderr, "NO_BARRIER "); 2183848b8605Smrg if (cf->end_of_program) 2184848b8605Smrg fprintf(stderr, "EOP "); 2185848b8605Smrg fprintf(stderr, "\n"); 2186848b8605Smrg } else if (r600_isa_cf(cf->op)->flags & CF_MEM) { 2187848b8605Smrg int o = 0; 2188848b8605Smrg const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", 2189848b8605Smrg "WRITE_IND_ACK"}; 2190848b8605Smrg o += fprintf(stderr, "%04d %08X %08X %s ", id, 2191848b8605Smrg bc->bytecode[id], bc->bytecode[id + 1], cfop->name); 2192848b8605Smrg o += print_indent(o, 43); 2193848b8605Smrg o += fprintf(stderr, "%s ", exp_type[cf->output.type]); 2194b8e80941Smrg 2195b8e80941Smrg if (r600_isa_cf(cf->op)->flags & CF_RAT) { 2196b8e80941Smrg o += fprintf(stderr, "RAT%d", cf->rat.id); 2197b8e80941Smrg if (cf->rat.index_mode) { 2198b8e80941Smrg o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1); 2199b8e80941Smrg } 2200b8e80941Smrg o += fprintf(stderr, " INST: %d ", cf->rat.inst); 2201b8e80941Smrg } 2202b8e80941Smrg 2203848b8605Smrg if (cf->output.burst_count > 1) { 2204848b8605Smrg o += fprintf(stderr, "%d-%d ", cf->output.array_base, 2205848b8605Smrg cf->output.array_base + cf->output.burst_count - 1); 2206848b8605Smrg o += print_indent(o, 55); 2207848b8605Smrg o += fprintf(stderr, "R%d-%d.", cf->output.gpr, 2208848b8605Smrg cf->output.gpr + cf->output.burst_count - 1); 2209848b8605Smrg } else { 2210848b8605Smrg o += fprintf(stderr, "%d ", cf->output.array_base); 2211848b8605Smrg o += print_indent(o, 55); 2212848b8605Smrg o += fprintf(stderr, "R%d.", cf->output.gpr); 2213848b8605Smrg } 2214848b8605Smrg for (i = 0; i < 4; ++i) { 2215848b8605Smrg if (cf->output.comp_mask & (1 << i)) 2216848b8605Smrg o += print_swizzle(i); 2217848b8605Smrg else 2218848b8605Smrg o += print_swizzle(7); 2219848b8605Smrg } 2220848b8605Smrg 2221b8e80941Smrg if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND || 2222b8e80941Smrg cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND) 2223848b8605Smrg o += fprintf(stderr, " R%d", cf->output.index_gpr); 2224848b8605Smrg 2225848b8605Smrg o += print_indent(o, 67); 2226848b8605Smrg 2227848b8605Smrg fprintf(stderr, " ES:%i ", cf->output.elem_size); 2228848b8605Smrg if (cf->output.array_size != 0xFFF) 2229848b8605Smrg fprintf(stderr, "AS:%i ", cf->output.array_size); 2230b8e80941Smrg if (cf->mark) 2231b8e80941Smrg fprintf(stderr, "MARK "); 2232848b8605Smrg if (!cf->barrier) 2233848b8605Smrg fprintf(stderr, "NO_BARRIER "); 2234848b8605Smrg if (cf->end_of_program) 2235848b8605Smrg fprintf(stderr, "EOP "); 2236b8e80941Smrg 2237b8e80941Smrg if (cf->output.mark) 2238b8e80941Smrg fprintf(stderr, "MARK "); 2239b8e80941Smrg 2240848b8605Smrg fprintf(stderr, "\n"); 2241848b8605Smrg } else { 2242848b8605Smrg fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2243848b8605Smrg bc->bytecode[id + 1], cfop->name); 2244848b8605Smrg fprintf(stderr, "@%d ", cf->cf_addr); 2245848b8605Smrg if (cf->cond) 2246848b8605Smrg fprintf(stderr, "CND:%X ", cf->cond); 2247848b8605Smrg if (cf->pop_count) 2248848b8605Smrg fprintf(stderr, "POP:%X ", cf->pop_count); 2249b8e80941Smrg if (cf->count && (cfop->flags & CF_EMIT)) 2250b8e80941Smrg fprintf(stderr, "STREAM%d ", cf->count); 2251b8e80941Smrg if (cf->vpm) 2252b8e80941Smrg fprintf(stderr, "VPM "); 2253b8e80941Smrg if (cf->end_of_program) 2254b8e80941Smrg fprintf(stderr, "EOP "); 2255848b8605Smrg fprintf(stderr, "\n"); 2256848b8605Smrg } 2257848b8605Smrg } 2258848b8605Smrg 2259848b8605Smrg id = cf->addr; 2260848b8605Smrg nliteral = 0; 2261848b8605Smrg last = 1; 2262848b8605Smrg LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2263848b8605Smrg const char *omod_str[] = {"","*2","*4","/2"}; 2264848b8605Smrg const struct alu_op_info *aop = r600_isa_alu(alu->op); 2265848b8605Smrg int o = 0; 2266848b8605Smrg 2267b8e80941Smrg r600_bytecode_alu_nliterals(alu, literal, &nliteral); 2268848b8605Smrg o += fprintf(stderr, " %04d %08X %08X ", id, bc->bytecode[id], bc->bytecode[id+1]); 2269848b8605Smrg if (last) 2270848b8605Smrg o += fprintf(stderr, "%4d ", ++ngr); 2271848b8605Smrg else 2272848b8605Smrg o += fprintf(stderr, " "); 2273848b8605Smrg o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ', 2274848b8605Smrg alu->update_pred ? 'P':' ', 2275848b8605Smrg alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' '); 2276848b8605Smrg 2277848b8605Smrg o += fprintf(stderr, "%s%s%s ", aop->name, 2278848b8605Smrg omod_str[alu->omod], alu->dst.clamp ? "_sat":""); 2279848b8605Smrg 2280848b8605Smrg o += print_indent(o,60); 2281848b8605Smrg o += print_dst(alu); 2282848b8605Smrg for (i = 0; i < aop->src_count; ++i) { 2283848b8605Smrg o += fprintf(stderr, i == 0 ? ", ": ", "); 2284848b8605Smrg o += print_src(alu, i); 2285848b8605Smrg } 2286848b8605Smrg 2287848b8605Smrg if (alu->bank_swizzle) { 2288848b8605Smrg o += print_indent(o,75); 2289848b8605Smrg o += fprintf(stderr, " BS:%d", alu->bank_swizzle); 2290848b8605Smrg } 2291848b8605Smrg 2292848b8605Smrg fprintf(stderr, "\n"); 2293848b8605Smrg id += 2; 2294848b8605Smrg 2295848b8605Smrg if (alu->last) { 2296848b8605Smrg for (i = 0; i < nliteral; i++, id++) { 2297848b8605Smrg float *f = (float*)(bc->bytecode + id); 2298848b8605Smrg o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]); 2299848b8605Smrg print_indent(o, 60); 2300848b8605Smrg fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id)); 2301848b8605Smrg } 2302848b8605Smrg id += nliteral & 1; 2303848b8605Smrg nliteral = 0; 2304848b8605Smrg } 2305848b8605Smrg last = alu->last; 2306848b8605Smrg } 2307848b8605Smrg 2308848b8605Smrg LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2309848b8605Smrg int o = 0; 2310848b8605Smrg o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2311848b8605Smrg bc->bytecode[id + 1], bc->bytecode[id + 2]); 2312848b8605Smrg 2313848b8605Smrg o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name); 2314848b8605Smrg 2315848b8605Smrg o += print_indent(o, 50); 2316848b8605Smrg 2317848b8605Smrg o += fprintf(stderr, "R%d.", tex->dst_gpr); 2318848b8605Smrg o += print_swizzle(tex->dst_sel_x); 2319848b8605Smrg o += print_swizzle(tex->dst_sel_y); 2320848b8605Smrg o += print_swizzle(tex->dst_sel_z); 2321848b8605Smrg o += print_swizzle(tex->dst_sel_w); 2322848b8605Smrg 2323848b8605Smrg o += fprintf(stderr, ", R%d.", tex->src_gpr); 2324848b8605Smrg o += print_swizzle(tex->src_sel_x); 2325848b8605Smrg o += print_swizzle(tex->src_sel_y); 2326848b8605Smrg o += print_swizzle(tex->src_sel_z); 2327848b8605Smrg o += print_swizzle(tex->src_sel_w); 2328848b8605Smrg 2329848b8605Smrg o += fprintf(stderr, ", RID:%d", tex->resource_id); 2330848b8605Smrg o += fprintf(stderr, ", SID:%d ", tex->sampler_id); 2331848b8605Smrg 2332b8e80941Smrg if (tex->sampler_index_mode) 2333b8e80941Smrg fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]); 2334b8e80941Smrg 2335848b8605Smrg if (tex->lod_bias) 2336848b8605Smrg fprintf(stderr, "LB:%d ", tex->lod_bias); 2337848b8605Smrg 2338848b8605Smrg fprintf(stderr, "CT:%c%c%c%c ", 2339848b8605Smrg tex->coord_type_x ? 'N' : 'U', 2340848b8605Smrg tex->coord_type_y ? 'N' : 'U', 2341848b8605Smrg tex->coord_type_z ? 'N' : 'U', 2342848b8605Smrg tex->coord_type_w ? 'N' : 'U'); 2343848b8605Smrg 2344848b8605Smrg if (tex->offset_x) 2345848b8605Smrg fprintf(stderr, "OX:%d ", tex->offset_x); 2346848b8605Smrg if (tex->offset_y) 2347848b8605Smrg fprintf(stderr, "OY:%d ", tex->offset_y); 2348848b8605Smrg if (tex->offset_z) 2349848b8605Smrg fprintf(stderr, "OZ:%d ", tex->offset_z); 2350848b8605Smrg 2351848b8605Smrg id += 4; 2352848b8605Smrg fprintf(stderr, "\n"); 2353848b8605Smrg } 2354848b8605Smrg 2355848b8605Smrg LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2356848b8605Smrg int o = 0; 2357848b8605Smrg const char * fetch_type[] = {"VERTEX", "INSTANCE", ""}; 2358848b8605Smrg o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2359848b8605Smrg bc->bytecode[id + 1], bc->bytecode[id + 2]); 2360848b8605Smrg 2361848b8605Smrg o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name); 2362848b8605Smrg 2363848b8605Smrg o += print_indent(o, 50); 2364848b8605Smrg 2365848b8605Smrg o += fprintf(stderr, "R%d.", vtx->dst_gpr); 2366848b8605Smrg o += print_swizzle(vtx->dst_sel_x); 2367848b8605Smrg o += print_swizzle(vtx->dst_sel_y); 2368848b8605Smrg o += print_swizzle(vtx->dst_sel_z); 2369848b8605Smrg o += print_swizzle(vtx->dst_sel_w); 2370848b8605Smrg 2371848b8605Smrg o += fprintf(stderr, ", R%d.", vtx->src_gpr); 2372848b8605Smrg o += print_swizzle(vtx->src_sel_x); 2373b8e80941Smrg if (r600_isa_fetch(vtx->op)->flags & FF_MEM) 2374b8e80941Smrg o += print_swizzle(vtx->src_sel_y); 2375848b8605Smrg 2376848b8605Smrg if (vtx->offset) 2377848b8605Smrg fprintf(stderr, " +%db", vtx->offset); 2378848b8605Smrg 2379848b8605Smrg o += print_indent(o, 55); 2380848b8605Smrg 2381848b8605Smrg fprintf(stderr, ", RID:%d ", vtx->buffer_id); 2382848b8605Smrg 2383848b8605Smrg fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]); 2384848b8605Smrg 2385848b8605Smrg if (bc->chip_class < CAYMAN && vtx->mega_fetch_count) 2386848b8605Smrg fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count); 2387848b8605Smrg 2388b8e80941Smrg if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode) 2389b8e80941Smrg fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]); 2390b8e80941Smrg 2391b8e80941Smrg if (r600_isa_fetch(vtx->op)->flags & FF_MEM) { 2392b8e80941Smrg if (vtx->uncached) 2393b8e80941Smrg fprintf(stderr, "UNCACHED "); 2394b8e80941Smrg if (vtx->indexed) 2395b8e80941Smrg fprintf(stderr, "INDEXED:%d ", vtx->indexed); 2396b8e80941Smrg 2397b8e80941Smrg fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size); 2398b8e80941Smrg if (vtx->burst_count) 2399b8e80941Smrg fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count); 2400b8e80941Smrg fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base); 2401b8e80941Smrg fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size); 2402b8e80941Smrg } 2403b8e80941Smrg 2404848b8605Smrg fprintf(stderr, "UCF:%d ", vtx->use_const_fields); 2405848b8605Smrg fprintf(stderr, "FMT(DTA:%d ", vtx->data_format); 2406848b8605Smrg fprintf(stderr, "NUM:%d ", vtx->num_format_all); 2407848b8605Smrg fprintf(stderr, "COMP:%d ", vtx->format_comp_all); 2408848b8605Smrg fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); 2409848b8605Smrg 2410848b8605Smrg id += 4; 2411848b8605Smrg } 2412b8e80941Smrg 2413b8e80941Smrg LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { 2414b8e80941Smrg int o = 0; 2415b8e80941Smrg o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2416b8e80941Smrg bc->bytecode[id + 1], bc->bytecode[id + 2]); 2417b8e80941Smrg 2418b8e80941Smrg o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name); 2419b8e80941Smrg 2420b8e80941Smrg if (gds->op != FETCH_OP_TF_WRITE) { 2421b8e80941Smrg o += fprintf(stderr, "R%d.", gds->dst_gpr); 2422b8e80941Smrg o += print_swizzle(gds->dst_sel_x); 2423b8e80941Smrg o += print_swizzle(gds->dst_sel_y); 2424b8e80941Smrg o += print_swizzle(gds->dst_sel_z); 2425b8e80941Smrg o += print_swizzle(gds->dst_sel_w); 2426b8e80941Smrg } 2427b8e80941Smrg 2428b8e80941Smrg o += fprintf(stderr, ", R%d.", gds->src_gpr); 2429b8e80941Smrg o += print_swizzle(gds->src_sel_x); 2430b8e80941Smrg o += print_swizzle(gds->src_sel_y); 2431b8e80941Smrg o += print_swizzle(gds->src_sel_z); 2432b8e80941Smrg 2433b8e80941Smrg if (gds->op != FETCH_OP_TF_WRITE) { 2434b8e80941Smrg o += fprintf(stderr, ", R%d.", gds->src_gpr2); 2435b8e80941Smrg } 2436b8e80941Smrg if (gds->alloc_consume) { 2437b8e80941Smrg o += fprintf(stderr, " UAV: %d", gds->uav_id); 2438b8e80941Smrg if (gds->uav_index_mode) 2439b8e80941Smrg o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]); 2440b8e80941Smrg } 2441b8e80941Smrg fprintf(stderr, "\n"); 2442b8e80941Smrg id += 4; 2443b8e80941Smrg } 2444848b8605Smrg } 2445848b8605Smrg 2446848b8605Smrg fprintf(stderr, "--------------------------------------\n"); 2447848b8605Smrg} 2448848b8605Smrg 2449848b8605Smrgvoid r600_vertex_data_type(enum pipe_format pformat, 2450848b8605Smrg unsigned *format, 2451848b8605Smrg unsigned *num_format, unsigned *format_comp, unsigned *endian) 2452848b8605Smrg{ 2453848b8605Smrg const struct util_format_description *desc; 2454848b8605Smrg unsigned i; 2455848b8605Smrg 2456848b8605Smrg *format = 0; 2457848b8605Smrg *num_format = 0; 2458848b8605Smrg *format_comp = 0; 2459848b8605Smrg *endian = ENDIAN_NONE; 2460848b8605Smrg 2461848b8605Smrg if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) { 2462848b8605Smrg *format = FMT_10_11_11_FLOAT; 2463848b8605Smrg *endian = r600_endian_swap(32); 2464848b8605Smrg return; 2465848b8605Smrg } 2466848b8605Smrg 2467b8e80941Smrg if (pformat == PIPE_FORMAT_B5G6R5_UNORM) { 2468b8e80941Smrg *format = FMT_5_6_5; 2469b8e80941Smrg *endian = r600_endian_swap(16); 2470b8e80941Smrg return; 2471b8e80941Smrg } 2472b8e80941Smrg 2473b8e80941Smrg if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) { 2474b8e80941Smrg *format = FMT_1_5_5_5; 2475b8e80941Smrg *endian = r600_endian_swap(16); 2476b8e80941Smrg return; 2477b8e80941Smrg } 2478b8e80941Smrg 2479b8e80941Smrg if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) { 2480b8e80941Smrg *format = FMT_5_5_5_1; 2481b8e80941Smrg return; 2482b8e80941Smrg } 2483b8e80941Smrg 2484848b8605Smrg desc = util_format_description(pformat); 2485848b8605Smrg if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { 2486848b8605Smrg goto out_unknown; 2487848b8605Smrg } 2488848b8605Smrg 2489848b8605Smrg /* Find the first non-VOID channel. */ 2490848b8605Smrg for (i = 0; i < 4; i++) { 2491848b8605Smrg if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2492848b8605Smrg break; 2493848b8605Smrg } 2494848b8605Smrg } 2495848b8605Smrg 2496848b8605Smrg *endian = r600_endian_swap(desc->channel[i].size); 2497848b8605Smrg 2498848b8605Smrg switch (desc->channel[i].type) { 2499848b8605Smrg /* Half-floats, floats, ints */ 2500848b8605Smrg case UTIL_FORMAT_TYPE_FLOAT: 2501848b8605Smrg switch (desc->channel[i].size) { 2502848b8605Smrg case 16: 2503848b8605Smrg switch (desc->nr_channels) { 2504848b8605Smrg case 1: 2505848b8605Smrg *format = FMT_16_FLOAT; 2506848b8605Smrg break; 2507848b8605Smrg case 2: 2508848b8605Smrg *format = FMT_16_16_FLOAT; 2509848b8605Smrg break; 2510848b8605Smrg case 3: 2511848b8605Smrg case 4: 2512848b8605Smrg *format = FMT_16_16_16_16_FLOAT; 2513848b8605Smrg break; 2514848b8605Smrg } 2515848b8605Smrg break; 2516848b8605Smrg case 32: 2517848b8605Smrg switch (desc->nr_channels) { 2518848b8605Smrg case 1: 2519848b8605Smrg *format = FMT_32_FLOAT; 2520848b8605Smrg break; 2521848b8605Smrg case 2: 2522848b8605Smrg *format = FMT_32_32_FLOAT; 2523848b8605Smrg break; 2524848b8605Smrg case 3: 2525848b8605Smrg *format = FMT_32_32_32_FLOAT; 2526848b8605Smrg break; 2527848b8605Smrg case 4: 2528848b8605Smrg *format = FMT_32_32_32_32_FLOAT; 2529848b8605Smrg break; 2530848b8605Smrg } 2531848b8605Smrg break; 2532848b8605Smrg default: 2533848b8605Smrg goto out_unknown; 2534848b8605Smrg } 2535848b8605Smrg break; 2536848b8605Smrg /* Unsigned ints */ 2537848b8605Smrg case UTIL_FORMAT_TYPE_UNSIGNED: 2538848b8605Smrg /* Signed ints */ 2539848b8605Smrg case UTIL_FORMAT_TYPE_SIGNED: 2540848b8605Smrg switch (desc->channel[i].size) { 2541b8e80941Smrg case 4: 2542b8e80941Smrg switch (desc->nr_channels) { 2543b8e80941Smrg case 2: 2544b8e80941Smrg *format = FMT_4_4; 2545b8e80941Smrg break; 2546b8e80941Smrg case 4: 2547b8e80941Smrg *format = FMT_4_4_4_4; 2548b8e80941Smrg break; 2549b8e80941Smrg } 2550b8e80941Smrg break; 2551848b8605Smrg case 8: 2552848b8605Smrg switch (desc->nr_channels) { 2553848b8605Smrg case 1: 2554848b8605Smrg *format = FMT_8; 2555848b8605Smrg break; 2556848b8605Smrg case 2: 2557848b8605Smrg *format = FMT_8_8; 2558848b8605Smrg break; 2559848b8605Smrg case 3: 2560848b8605Smrg case 4: 2561848b8605Smrg *format = FMT_8_8_8_8; 2562848b8605Smrg break; 2563848b8605Smrg } 2564848b8605Smrg break; 2565848b8605Smrg case 10: 2566848b8605Smrg if (desc->nr_channels != 4) 2567848b8605Smrg goto out_unknown; 2568848b8605Smrg 2569848b8605Smrg *format = FMT_2_10_10_10; 2570848b8605Smrg break; 2571848b8605Smrg case 16: 2572848b8605Smrg switch (desc->nr_channels) { 2573848b8605Smrg case 1: 2574848b8605Smrg *format = FMT_16; 2575848b8605Smrg break; 2576848b8605Smrg case 2: 2577848b8605Smrg *format = FMT_16_16; 2578848b8605Smrg break; 2579848b8605Smrg case 3: 2580848b8605Smrg case 4: 2581848b8605Smrg *format = FMT_16_16_16_16; 2582848b8605Smrg break; 2583848b8605Smrg } 2584848b8605Smrg break; 2585848b8605Smrg case 32: 2586848b8605Smrg switch (desc->nr_channels) { 2587848b8605Smrg case 1: 2588848b8605Smrg *format = FMT_32; 2589848b8605Smrg break; 2590848b8605Smrg case 2: 2591848b8605Smrg *format = FMT_32_32; 2592848b8605Smrg break; 2593848b8605Smrg case 3: 2594848b8605Smrg *format = FMT_32_32_32; 2595848b8605Smrg break; 2596848b8605Smrg case 4: 2597848b8605Smrg *format = FMT_32_32_32_32; 2598848b8605Smrg break; 2599848b8605Smrg } 2600848b8605Smrg break; 2601848b8605Smrg default: 2602848b8605Smrg goto out_unknown; 2603848b8605Smrg } 2604848b8605Smrg break; 2605848b8605Smrg default: 2606848b8605Smrg goto out_unknown; 2607848b8605Smrg } 2608848b8605Smrg 2609848b8605Smrg if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2610848b8605Smrg *format_comp = 1; 2611848b8605Smrg } 2612848b8605Smrg 2613848b8605Smrg *num_format = 0; 2614848b8605Smrg if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || 2615848b8605Smrg desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2616848b8605Smrg if (!desc->channel[i].normalized) { 2617848b8605Smrg if (desc->channel[i].pure_integer) 2618848b8605Smrg *num_format = 1; 2619848b8605Smrg else 2620848b8605Smrg *num_format = 2; 2621848b8605Smrg } 2622848b8605Smrg } 2623848b8605Smrg return; 2624848b8605Smrgout_unknown: 2625848b8605Smrg R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); 2626848b8605Smrg} 2627848b8605Smrg 2628848b8605Smrgvoid *r600_create_vertex_fetch_shader(struct pipe_context *ctx, 2629848b8605Smrg unsigned count, 2630848b8605Smrg const struct pipe_vertex_element *elements) 2631848b8605Smrg{ 2632848b8605Smrg struct r600_context *rctx = (struct r600_context *)ctx; 2633848b8605Smrg struct r600_bytecode bc; 2634848b8605Smrg struct r600_bytecode_vtx vtx; 2635848b8605Smrg const struct util_format_description *desc; 2636848b8605Smrg unsigned fetch_resource_start = rctx->b.chip_class >= EVERGREEN ? 0 : 160; 2637848b8605Smrg unsigned format, num_format, format_comp, endian; 2638848b8605Smrg uint32_t *bytecode; 2639848b8605Smrg int i, j, r, fs_size; 2640848b8605Smrg struct r600_fetch_shader *shader; 2641848b8605Smrg unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB; 2642848b8605Smrg unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 2643848b8605Smrg 2644848b8605Smrg assert(count < 32); 2645848b8605Smrg 2646848b8605Smrg memset(&bc, 0, sizeof(bc)); 2647848b8605Smrg r600_bytecode_init(&bc, rctx->b.chip_class, rctx->b.family, 2648848b8605Smrg rctx->screen->has_compressed_msaa_texturing); 2649848b8605Smrg 2650848b8605Smrg bc.isa = rctx->isa; 2651848b8605Smrg 2652848b8605Smrg for (i = 0; i < count; i++) { 2653848b8605Smrg if (elements[i].instance_divisor > 1) { 2654848b8605Smrg if (rctx->b.chip_class == CAYMAN) { 2655848b8605Smrg for (j = 0; j < 4; j++) { 2656848b8605Smrg struct r600_bytecode_alu alu; 2657848b8605Smrg memset(&alu, 0, sizeof(alu)); 2658848b8605Smrg alu.op = ALU_OP2_MULHI_UINT; 2659848b8605Smrg alu.src[0].sel = 0; 2660848b8605Smrg alu.src[0].chan = 3; 2661848b8605Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2662848b8605Smrg alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2663848b8605Smrg alu.dst.sel = i + 1; 2664848b8605Smrg alu.dst.chan = j; 2665848b8605Smrg alu.dst.write = j == 3; 2666848b8605Smrg alu.last = j == 3; 2667848b8605Smrg if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2668848b8605Smrg r600_bytecode_clear(&bc); 2669848b8605Smrg return NULL; 2670848b8605Smrg } 2671848b8605Smrg } 2672848b8605Smrg } else { 2673848b8605Smrg struct r600_bytecode_alu alu; 2674848b8605Smrg memset(&alu, 0, sizeof(alu)); 2675848b8605Smrg alu.op = ALU_OP2_MULHI_UINT; 2676848b8605Smrg alu.src[0].sel = 0; 2677848b8605Smrg alu.src[0].chan = 3; 2678848b8605Smrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2679848b8605Smrg alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2680848b8605Smrg alu.dst.sel = i + 1; 2681848b8605Smrg alu.dst.chan = 3; 2682848b8605Smrg alu.dst.write = 1; 2683848b8605Smrg alu.last = 1; 2684848b8605Smrg if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2685848b8605Smrg r600_bytecode_clear(&bc); 2686848b8605Smrg return NULL; 2687848b8605Smrg } 2688848b8605Smrg } 2689848b8605Smrg } 2690848b8605Smrg } 2691848b8605Smrg 2692848b8605Smrg for (i = 0; i < count; i++) { 2693848b8605Smrg r600_vertex_data_type(elements[i].src_format, 2694848b8605Smrg &format, &num_format, &format_comp, &endian); 2695848b8605Smrg 2696848b8605Smrg desc = util_format_description(elements[i].src_format); 2697b8e80941Smrg if (!desc) { 2698848b8605Smrg r600_bytecode_clear(&bc); 2699848b8605Smrg R600_ERR("unknown format %d\n", elements[i].src_format); 2700848b8605Smrg return NULL; 2701848b8605Smrg } 2702848b8605Smrg 2703848b8605Smrg if (elements[i].src_offset > 65535) { 2704848b8605Smrg r600_bytecode_clear(&bc); 2705848b8605Smrg R600_ERR("too big src_offset: %u\n", elements[i].src_offset); 2706848b8605Smrg return NULL; 2707848b8605Smrg } 2708848b8605Smrg 2709848b8605Smrg memset(&vtx, 0, sizeof(vtx)); 2710848b8605Smrg vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; 2711b8e80941Smrg vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA; 2712848b8605Smrg vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; 2713848b8605Smrg vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; 2714848b8605Smrg vtx.mega_fetch_count = 0x1F; 2715848b8605Smrg vtx.dst_gpr = i + 1; 2716848b8605Smrg vtx.dst_sel_x = desc->swizzle[0]; 2717848b8605Smrg vtx.dst_sel_y = desc->swizzle[1]; 2718848b8605Smrg vtx.dst_sel_z = desc->swizzle[2]; 2719848b8605Smrg vtx.dst_sel_w = desc->swizzle[3]; 2720848b8605Smrg vtx.data_format = format; 2721848b8605Smrg vtx.num_format_all = num_format; 2722848b8605Smrg vtx.format_comp_all = format_comp; 2723848b8605Smrg vtx.offset = elements[i].src_offset; 2724848b8605Smrg vtx.endian = endian; 2725848b8605Smrg 2726848b8605Smrg if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { 2727848b8605Smrg r600_bytecode_clear(&bc); 2728848b8605Smrg return NULL; 2729848b8605Smrg } 2730848b8605Smrg } 2731848b8605Smrg 2732848b8605Smrg r600_bytecode_add_cfinst(&bc, CF_OP_RET); 2733848b8605Smrg 2734848b8605Smrg if ((r = r600_bytecode_build(&bc))) { 2735848b8605Smrg r600_bytecode_clear(&bc); 2736848b8605Smrg return NULL; 2737848b8605Smrg } 2738848b8605Smrg 2739848b8605Smrg if (rctx->screen->b.debug_flags & DBG_FS) { 2740848b8605Smrg fprintf(stderr, "--------------------------------------------------------------\n"); 2741848b8605Smrg fprintf(stderr, "Vertex elements state:\n"); 2742848b8605Smrg for (i = 0; i < count; i++) { 2743848b8605Smrg fprintf(stderr, " "); 2744848b8605Smrg util_dump_vertex_element(stderr, elements+i); 2745848b8605Smrg fprintf(stderr, "\n"); 2746848b8605Smrg } 2747848b8605Smrg 2748848b8605Smrg if (!sb_disasm) { 2749848b8605Smrg r600_bytecode_disasm(&bc); 2750848b8605Smrg 2751848b8605Smrg fprintf(stderr, "______________________________________________________________\n"); 2752848b8605Smrg } else { 2753848b8605Smrg r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/); 2754848b8605Smrg } 2755848b8605Smrg } 2756848b8605Smrg 2757848b8605Smrg fs_size = bc.ndw*4; 2758848b8605Smrg 2759848b8605Smrg /* Allocate the CSO. */ 2760848b8605Smrg shader = CALLOC_STRUCT(r600_fetch_shader); 2761848b8605Smrg if (!shader) { 2762848b8605Smrg r600_bytecode_clear(&bc); 2763848b8605Smrg return NULL; 2764848b8605Smrg } 2765848b8605Smrg 2766b8e80941Smrg u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256, 2767b8e80941Smrg &shader->offset, 2768848b8605Smrg (struct pipe_resource**)&shader->buffer); 2769848b8605Smrg if (!shader->buffer) { 2770848b8605Smrg r600_bytecode_clear(&bc); 2771848b8605Smrg FREE(shader); 2772848b8605Smrg return NULL; 2773848b8605Smrg } 2774848b8605Smrg 2775b8e80941Smrg bytecode = r600_buffer_map_sync_with_rings 2776b8e80941Smrg (&rctx->b, shader->buffer, 2777b8e80941Smrg PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY); 2778848b8605Smrg bytecode += shader->offset / 4; 2779848b8605Smrg 2780848b8605Smrg if (R600_BIG_ENDIAN) { 2781848b8605Smrg for (i = 0; i < fs_size / 4; ++i) { 2782848b8605Smrg bytecode[i] = util_cpu_to_le32(bc.bytecode[i]); 2783848b8605Smrg } 2784848b8605Smrg } else { 2785848b8605Smrg memcpy(bytecode, bc.bytecode, fs_size); 2786848b8605Smrg } 2787b8e80941Smrg rctx->b.ws->buffer_unmap(shader->buffer->buf); 2788848b8605Smrg 2789848b8605Smrg r600_bytecode_clear(&bc); 2790848b8605Smrg return shader; 2791848b8605Smrg} 2792848b8605Smrg 2793848b8605Smrgvoid r600_bytecode_alu_read(struct r600_bytecode *bc, 2794848b8605Smrg struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) 2795848b8605Smrg{ 2796848b8605Smrg /* WORD0 */ 2797848b8605Smrg alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0); 2798848b8605Smrg alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0); 2799848b8605Smrg alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0); 2800848b8605Smrg alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0); 2801848b8605Smrg alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0); 2802848b8605Smrg alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0); 2803848b8605Smrg alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0); 2804848b8605Smrg alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0); 2805848b8605Smrg alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0); 2806848b8605Smrg alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0); 2807848b8605Smrg alu->last = G_SQ_ALU_WORD0_LAST(word0); 2808848b8605Smrg 2809848b8605Smrg /* WORD1 */ 2810848b8605Smrg alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1); 2811848b8605Smrg if (alu->bank_swizzle) 2812848b8605Smrg alu->bank_swizzle_force = alu->bank_swizzle; 2813848b8605Smrg alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1); 2814848b8605Smrg alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1); 2815848b8605Smrg alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1); 2816848b8605Smrg alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1); 2817848b8605Smrg if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ 2818848b8605Smrg { 2819848b8605Smrg alu->is_op3 = 1; 2820848b8605Smrg alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); 2821848b8605Smrg alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); 2822848b8605Smrg alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1); 2823848b8605Smrg alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1); 2824848b8605Smrg alu->op = r600_isa_alu_by_opcode(bc->isa, 2825848b8605Smrg G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1); 2826848b8605Smrg 2827848b8605Smrg } 2828848b8605Smrg else /*ALU_DWORD1_OP2*/ 2829848b8605Smrg { 2830848b8605Smrg alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1); 2831848b8605Smrg alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1); 2832848b8605Smrg alu->op = r600_isa_alu_by_opcode(bc->isa, 2833848b8605Smrg G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0); 2834848b8605Smrg alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1); 2835848b8605Smrg alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1); 2836848b8605Smrg alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1); 2837848b8605Smrg alu->execute_mask = 2838848b8605Smrg G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); 2839848b8605Smrg } 2840848b8605Smrg} 2841848b8605Smrg 2842848b8605Smrg#if 0 2843848b8605Smrgvoid r600_bytecode_export_read(struct r600_bytecode *bc, 2844848b8605Smrg struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) 2845848b8605Smrg{ 2846848b8605Smrg output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); 2847848b8605Smrg output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); 2848848b8605Smrg output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); 2849848b8605Smrg output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); 2850848b8605Smrg 2851848b8605Smrg output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); 2852848b8605Smrg output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); 2853848b8605Smrg output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); 2854848b8605Smrg output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); 2855848b8605Smrg output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); 2856848b8605Smrg output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); 2857848b8605Smrg output->op = r600_isa_cf_by_opcode(bc->isa, 2858848b8605Smrg G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0); 2859848b8605Smrg output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); 2860848b8605Smrg output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); 2861848b8605Smrg output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); 2862848b8605Smrg} 2863848b8605Smrg#endif 2864