13464ebd5Sriastradh/* 23464ebd5Sriastradh * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 33464ebd5Sriastradh * 43464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a 53464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"), 63464ebd5Sriastradh * to deal in the Software without restriction, including without limitation 73464ebd5Sriastradh * on the rights to use, copy, modify, merge, publish, distribute, sub 83464ebd5Sriastradh * license, and/or sell copies of the Software, and to permit persons to whom 93464ebd5Sriastradh * the Software is furnished to do so, subject to the following conditions: 103464ebd5Sriastradh * 113464ebd5Sriastradh * The above copyright notice and this permission notice (including the next 123464ebd5Sriastradh * paragraph) shall be included in all copies or substantial portions of the 133464ebd5Sriastradh * Software. 143464ebd5Sriastradh * 153464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 163464ebd5Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 173464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 183464ebd5Sriastradh * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 193464ebd5Sriastradh * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 203464ebd5Sriastradh * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 213464ebd5Sriastradh * USE OR OTHER DEALINGS IN THE SOFTWARE. 223464ebd5Sriastradh */ 233464ebd5Sriastradh#include "r600_sq.h" 243464ebd5Sriastradh#include "r600_opcodes.h" 253464ebd5Sriastradh#include "r600_formats.h" 26af69d88dSmrg#include "r600_shader.h" 273464ebd5Sriastradh#include "r600d.h" 283464ebd5Sriastradh 29af69d88dSmrg#include <errno.h> 3001e04c3fSmrg#include "util/u_bitcast.h" 31af69d88dSmrg#include "util/u_dump.h" 32af69d88dSmrg#include "util/u_memory.h" 33af69d88dSmrg#include "util/u_math.h" 34af69d88dSmrg#include "pipe/p_shader_tokens.h" 35af69d88dSmrg 36af69d88dSmrg#include "sb/sb_public.h" 37af69d88dSmrg 383464ebd5Sriastradh#define NUM_OF_CYCLES 3 393464ebd5Sriastradh#define NUM_OF_COMPONENTS 4 403464ebd5Sriastradh 4101e04c3fSmrgstatic inline bool alu_writes(struct r600_bytecode_alu *alu) 423464ebd5Sriastradh{ 4301e04c3fSmrg return alu->dst.write || alu->is_op3; 443464ebd5Sriastradh} 453464ebd5Sriastradh 4601e04c3fSmrgstatic inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu) 4701e04c3fSmrg{ 4801e04c3fSmrg return r600_isa_alu(alu->op)->src_count; 4901e04c3fSmrg} 503464ebd5Sriastradh 51af69d88dSmrgstatic struct r600_bytecode_cf *r600_bytecode_cf(void) 523464ebd5Sriastradh{ 53af69d88dSmrg struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); 543464ebd5Sriastradh 5501e04c3fSmrg if (!cf) 563464ebd5Sriastradh return NULL; 577ec681f3Smrg list_inithead(&cf->list); 587ec681f3Smrg list_inithead(&cf->alu); 597ec681f3Smrg list_inithead(&cf->vtx); 607ec681f3Smrg list_inithead(&cf->tex); 617ec681f3Smrg list_inithead(&cf->gds); 623464ebd5Sriastradh return cf; 633464ebd5Sriastradh} 643464ebd5Sriastradh 65af69d88dSmrgstatic struct r600_bytecode_alu *r600_bytecode_alu(void) 663464ebd5Sriastradh{ 67af69d88dSmrg struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); 683464ebd5Sriastradh 6901e04c3fSmrg if (!alu) 703464ebd5Sriastradh return NULL; 717ec681f3Smrg list_inithead(&alu->list); 723464ebd5Sriastradh return alu; 733464ebd5Sriastradh} 743464ebd5Sriastradh 75af69d88dSmrgstatic struct r600_bytecode_vtx *r600_bytecode_vtx(void) 763464ebd5Sriastradh{ 77af69d88dSmrg struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); 783464ebd5Sriastradh 7901e04c3fSmrg if (!vtx) 803464ebd5Sriastradh return NULL; 817ec681f3Smrg list_inithead(&vtx->list); 823464ebd5Sriastradh return vtx; 833464ebd5Sriastradh} 843464ebd5Sriastradh 85af69d88dSmrgstatic struct r600_bytecode_tex *r600_bytecode_tex(void) 863464ebd5Sriastradh{ 87af69d88dSmrg struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); 883464ebd5Sriastradh 8901e04c3fSmrg if (!tex) 903464ebd5Sriastradh return NULL; 917ec681f3Smrg list_inithead(&tex->list); 923464ebd5Sriastradh return tex; 933464ebd5Sriastradh} 943464ebd5Sriastradh 9501e04c3fSmrgstatic struct r600_bytecode_gds *r600_bytecode_gds(void) 9601e04c3fSmrg{ 9701e04c3fSmrg struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds); 9801e04c3fSmrg 9901e04c3fSmrg if (gds == NULL) 10001e04c3fSmrg return NULL; 1017ec681f3Smrg list_inithead(&gds->list); 10201e04c3fSmrg return gds; 10301e04c3fSmrg} 10401e04c3fSmrg 105af69d88dSmrgstatic unsigned stack_entry_size(enum radeon_family chip) { 106af69d88dSmrg /* Wavefront size: 107af69d88dSmrg * 64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/ 108af69d88dSmrg * Aruba/Sumo/Sumo2/redwood/juniper 109af69d88dSmrg * 32: R630/R730/R710/Palm/Cedar 110af69d88dSmrg * 16: R610/Rs780 111af69d88dSmrg * 112af69d88dSmrg * Stack row size: 113af69d88dSmrg * Wavefront Size 16 32 48 64 114af69d88dSmrg * Columns per Row (R6xx/R7xx/R8xx only) 8 8 4 4 115af69d88dSmrg * Columns per Row (R9xx+) 8 4 4 4 */ 116af69d88dSmrg 117af69d88dSmrg switch (chip) { 118af69d88dSmrg /* FIXME: are some chips missing here? */ 119af69d88dSmrg /* wavefront size 16 */ 1203464ebd5Sriastradh case CHIP_RV610: 1213464ebd5Sriastradh case CHIP_RS780: 122af69d88dSmrg case CHIP_RV620: 1233464ebd5Sriastradh case CHIP_RS880: 124af69d88dSmrg /* wavefront size 32 */ 125af69d88dSmrg case CHIP_RV630: 126af69d88dSmrg case CHIP_RV635: 1273464ebd5Sriastradh case CHIP_RV730: 1283464ebd5Sriastradh case CHIP_RV710: 1293464ebd5Sriastradh case CHIP_PALM: 130af69d88dSmrg case CHIP_CEDAR: 131af69d88dSmrg return 8; 132af69d88dSmrg 133af69d88dSmrg /* wavefront size 64 */ 1343464ebd5Sriastradh default: 135af69d88dSmrg return 4; 1363464ebd5Sriastradh } 1373464ebd5Sriastradh} 1383464ebd5Sriastradh 139af69d88dSmrgvoid r600_bytecode_init(struct r600_bytecode *bc, 140af69d88dSmrg enum chip_class chip_class, 141af69d88dSmrg enum radeon_family family, 142af69d88dSmrg bool has_compressed_msaa_texturing) 143af69d88dSmrg{ 144af69d88dSmrg static unsigned next_shader_id = 0; 145af69d88dSmrg 146af69d88dSmrg bc->debug_id = ++next_shader_id; 147af69d88dSmrg 148af69d88dSmrg if ((chip_class == R600) && 149af69d88dSmrg (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { 150af69d88dSmrg bc->ar_handling = AR_HANDLE_RV6XX; 151af69d88dSmrg bc->r6xx_nop_after_rel_dst = 1; 152af69d88dSmrg } else { 153af69d88dSmrg bc->ar_handling = AR_HANDLE_NORMAL; 154af69d88dSmrg bc->r6xx_nop_after_rel_dst = 0; 155af69d88dSmrg } 156af69d88dSmrg 1577ec681f3Smrg list_inithead(&bc->cf); 158af69d88dSmrg bc->chip_class = chip_class; 159af69d88dSmrg bc->family = family; 160af69d88dSmrg bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing; 161af69d88dSmrg bc->stack.entry_size = stack_entry_size(family); 162af69d88dSmrg} 163af69d88dSmrg 164af69d88dSmrgint r600_bytecode_add_cf(struct r600_bytecode *bc) 1653464ebd5Sriastradh{ 166af69d88dSmrg struct r600_bytecode_cf *cf = r600_bytecode_cf(); 1673464ebd5Sriastradh 16801e04c3fSmrg if (!cf) 1693464ebd5Sriastradh return -ENOMEM; 1707ec681f3Smrg list_addtail(&cf->list, &bc->cf); 171af69d88dSmrg if (bc->cf_last) { 1723464ebd5Sriastradh cf->id = bc->cf_last->id + 2; 173af69d88dSmrg if (bc->cf_last->eg_alu_extended) { 174af69d88dSmrg /* take into account extended alu size */ 175af69d88dSmrg cf->id += 2; 176af69d88dSmrg bc->ndw += 2; 177af69d88dSmrg } 178af69d88dSmrg } 1793464ebd5Sriastradh bc->cf_last = cf; 1803464ebd5Sriastradh bc->ncf++; 1813464ebd5Sriastradh bc->ndw += 2; 1823464ebd5Sriastradh bc->force_add_cf = 0; 183af69d88dSmrg bc->ar_loaded = 0; 1843464ebd5Sriastradh return 0; 1853464ebd5Sriastradh} 1863464ebd5Sriastradh 187af69d88dSmrgint r600_bytecode_add_output(struct r600_bytecode *bc, 188af69d88dSmrg const struct r600_bytecode_output *output) 1893464ebd5Sriastradh{ 1903464ebd5Sriastradh int r; 1913464ebd5Sriastradh 192af69d88dSmrg if (output->gpr >= bc->ngpr) 193af69d88dSmrg bc->ngpr = output->gpr + 1; 194af69d88dSmrg 195af69d88dSmrg if (bc->cf_last && (bc->cf_last->op == output->op || 196af69d88dSmrg (bc->cf_last->op == CF_OP_EXPORT && 197af69d88dSmrg output->op == CF_OP_EXPORT_DONE)) && 1983464ebd5Sriastradh output->type == bc->cf_last->output.type && 1993464ebd5Sriastradh output->elem_size == bc->cf_last->output.elem_size && 2003464ebd5Sriastradh output->swizzle_x == bc->cf_last->output.swizzle_x && 2013464ebd5Sriastradh output->swizzle_y == bc->cf_last->output.swizzle_y && 2023464ebd5Sriastradh output->swizzle_z == bc->cf_last->output.swizzle_z && 2033464ebd5Sriastradh output->swizzle_w == bc->cf_last->output.swizzle_w && 204af69d88dSmrg output->comp_mask == bc->cf_last->output.comp_mask && 2053464ebd5Sriastradh (output->burst_count + bc->cf_last->output.burst_count) <= 16) { 2063464ebd5Sriastradh 2073464ebd5Sriastradh if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && 2083464ebd5Sriastradh (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { 2093464ebd5Sriastradh 210af69d88dSmrg bc->cf_last->op = bc->cf_last->output.op = output->op; 2113464ebd5Sriastradh bc->cf_last->output.gpr = output->gpr; 2123464ebd5Sriastradh bc->cf_last->output.array_base = output->array_base; 2133464ebd5Sriastradh bc->cf_last->output.burst_count += output->burst_count; 2143464ebd5Sriastradh return 0; 2153464ebd5Sriastradh 2163464ebd5Sriastradh } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && 2173464ebd5Sriastradh output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { 2183464ebd5Sriastradh 219af69d88dSmrg bc->cf_last->op = bc->cf_last->output.op = output->op; 2203464ebd5Sriastradh bc->cf_last->output.burst_count += output->burst_count; 2213464ebd5Sriastradh return 0; 2223464ebd5Sriastradh } 2233464ebd5Sriastradh } 2243464ebd5Sriastradh 225af69d88dSmrg r = r600_bytecode_add_cf(bc); 2263464ebd5Sriastradh if (r) 2273464ebd5Sriastradh return r; 228af69d88dSmrg bc->cf_last->op = output->op; 229af69d88dSmrg memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); 230af69d88dSmrg bc->cf_last->barrier = 1; 2313464ebd5Sriastradh return 0; 2323464ebd5Sriastradh} 2333464ebd5Sriastradh 23401e04c3fSmrgint r600_bytecode_add_pending_output(struct r600_bytecode *bc, 23501e04c3fSmrg const struct r600_bytecode_output *output) 23601e04c3fSmrg{ 23701e04c3fSmrg assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs)); 23801e04c3fSmrg bc->pending_outputs[bc->n_pending_outputs++] = *output; 23901e04c3fSmrg 24001e04c3fSmrg return 0; 24101e04c3fSmrg} 24201e04c3fSmrg 24301e04c3fSmrgvoid r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack) 24401e04c3fSmrg{ 24501e04c3fSmrg bc->need_wait_ack = need_wait_ack; 24601e04c3fSmrg} 24701e04c3fSmrg 24801e04c3fSmrgboolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc) 24901e04c3fSmrg{ 25001e04c3fSmrg return bc->need_wait_ack; 25101e04c3fSmrg} 25201e04c3fSmrg 2533464ebd5Sriastradh/* alu instructions that can ony exits once per group */ 25401e04c3fSmrgstatic int is_alu_once_inst(struct r600_bytecode_alu *alu) 2553464ebd5Sriastradh{ 25601e04c3fSmrg return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER; 2573464ebd5Sriastradh} 2583464ebd5Sriastradh 259af69d88dSmrgstatic int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 2603464ebd5Sriastradh{ 261af69d88dSmrg return (r600_isa_alu(alu->op)->flags & AF_REPL) && 262af69d88dSmrg (r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V); 2633464ebd5Sriastradh} 2643464ebd5Sriastradh 26501e04c3fSmrgstatic int is_alu_mova_inst(struct r600_bytecode_alu *alu) 2663464ebd5Sriastradh{ 267af69d88dSmrg return r600_isa_alu(alu->op)->flags & AF_MOVA; 2683464ebd5Sriastradh} 2693464ebd5Sriastradh 27001e04c3fSmrgstatic int alu_uses_rel(struct r600_bytecode_alu *alu) 2713464ebd5Sriastradh{ 27201e04c3fSmrg unsigned num_src = r600_bytecode_get_num_operands(alu); 273af69d88dSmrg unsigned src; 274af69d88dSmrg 275af69d88dSmrg if (alu->dst.rel) { 276af69d88dSmrg return 1; 2773464ebd5Sriastradh } 278af69d88dSmrg 279af69d88dSmrg for (src = 0; src < num_src; ++src) { 280af69d88dSmrg if (alu->src[src].rel) { 281af69d88dSmrg return 1; 282af69d88dSmrg } 283af69d88dSmrg } 284af69d88dSmrg return 0; 2853464ebd5Sriastradh} 2863464ebd5Sriastradh 28701e04c3fSmrgstatic int is_lds_read(int sel) 28801e04c3fSmrg{ 28901e04c3fSmrg return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP; 29001e04c3fSmrg} 29101e04c3fSmrg 29201e04c3fSmrgstatic int alu_uses_lds(struct r600_bytecode_alu *alu) 29301e04c3fSmrg{ 29401e04c3fSmrg unsigned num_src = r600_bytecode_get_num_operands(alu); 29501e04c3fSmrg unsigned src; 29601e04c3fSmrg 29701e04c3fSmrg for (src = 0; src < num_src; ++src) { 29801e04c3fSmrg if (is_lds_read(alu->src[src].sel)) { 29901e04c3fSmrg return 1; 30001e04c3fSmrg } 30101e04c3fSmrg } 30201e04c3fSmrg return 0; 30301e04c3fSmrg} 30401e04c3fSmrg 30501e04c3fSmrgstatic int is_alu_64bit_inst(struct r600_bytecode_alu *alu) 30601e04c3fSmrg{ 30701e04c3fSmrg const struct alu_op_info *op = r600_isa_alu(alu->op); 30801e04c3fSmrg return (op->flags & AF_64); 30901e04c3fSmrg} 31001e04c3fSmrg 311af69d88dSmrgstatic int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 3123464ebd5Sriastradh{ 313af69d88dSmrg unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 314af69d88dSmrg return !(slots & AF_S); 3153464ebd5Sriastradh} 3163464ebd5Sriastradh 317af69d88dSmrgstatic int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 3183464ebd5Sriastradh{ 319af69d88dSmrg unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 320af69d88dSmrg return !(slots & AF_V); 3213464ebd5Sriastradh} 3223464ebd5Sriastradh 3233464ebd5Sriastradh/* alu instructions that can execute on any unit */ 324af69d88dSmrgstatic int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 3253464ebd5Sriastradh{ 326af69d88dSmrg unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); 327af69d88dSmrg return slots == AF_VS; 3283464ebd5Sriastradh} 3293464ebd5Sriastradh 33001e04c3fSmrgstatic int is_nop_inst(struct r600_bytecode_alu *alu) 331af69d88dSmrg{ 332af69d88dSmrg return alu->op == ALU_OP0_NOP; 33301e04c3fSmrg} 334af69d88dSmrg 335af69d88dSmrgstatic int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, 336af69d88dSmrg struct r600_bytecode_alu *assignment[5]) 3373464ebd5Sriastradh{ 338af69d88dSmrg struct r600_bytecode_alu *alu; 3393464ebd5Sriastradh unsigned i, chan, trans; 340af69d88dSmrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 3413464ebd5Sriastradh 3423464ebd5Sriastradh for (i = 0; i < max_slots; i++) 3433464ebd5Sriastradh assignment[i] = NULL; 3443464ebd5Sriastradh 345af69d88dSmrg for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) { 3463464ebd5Sriastradh chan = alu->dst.chan; 3473464ebd5Sriastradh if (max_slots == 4) 3483464ebd5Sriastradh trans = 0; 3493464ebd5Sriastradh else if (is_alu_trans_unit_inst(bc, alu)) 3503464ebd5Sriastradh trans = 1; 3513464ebd5Sriastradh else if (is_alu_vec_unit_inst(bc, alu)) 3523464ebd5Sriastradh trans = 0; 3533464ebd5Sriastradh else if (assignment[chan]) 3543464ebd5Sriastradh trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */ 3553464ebd5Sriastradh else 3563464ebd5Sriastradh trans = 0; 3573464ebd5Sriastradh 3583464ebd5Sriastradh if (trans) { 3593464ebd5Sriastradh if (assignment[4]) { 3603464ebd5Sriastradh assert(0); /* ALU.Trans has already been allocated. */ 3613464ebd5Sriastradh return -1; 3623464ebd5Sriastradh } 3633464ebd5Sriastradh assignment[4] = alu; 3643464ebd5Sriastradh } else { 3657ec681f3Smrg if (assignment[chan]) { 3663464ebd5Sriastradh assert(0); /* ALU.chan has already been allocated. */ 3673464ebd5Sriastradh return -1; 3683464ebd5Sriastradh } 3693464ebd5Sriastradh assignment[chan] = alu; 3703464ebd5Sriastradh } 3713464ebd5Sriastradh 3723464ebd5Sriastradh if (alu->last) 3733464ebd5Sriastradh break; 3743464ebd5Sriastradh } 3753464ebd5Sriastradh return 0; 3763464ebd5Sriastradh} 3773464ebd5Sriastradh 3783464ebd5Sriastradhstruct alu_bank_swizzle { 3793464ebd5Sriastradh int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; 3803464ebd5Sriastradh int hw_cfile_addr[4]; 3813464ebd5Sriastradh int hw_cfile_elem[4]; 3823464ebd5Sriastradh}; 3833464ebd5Sriastradh 3843464ebd5Sriastradhstatic const unsigned cycle_for_bank_swizzle_vec[][3] = { 3853464ebd5Sriastradh [SQ_ALU_VEC_012] = { 0, 1, 2 }, 3863464ebd5Sriastradh [SQ_ALU_VEC_021] = { 0, 2, 1 }, 3873464ebd5Sriastradh [SQ_ALU_VEC_120] = { 1, 2, 0 }, 3883464ebd5Sriastradh [SQ_ALU_VEC_102] = { 1, 0, 2 }, 3893464ebd5Sriastradh [SQ_ALU_VEC_201] = { 2, 0, 1 }, 3903464ebd5Sriastradh [SQ_ALU_VEC_210] = { 2, 1, 0 } 3913464ebd5Sriastradh}; 3923464ebd5Sriastradh 3933464ebd5Sriastradhstatic const unsigned cycle_for_bank_swizzle_scl[][3] = { 3943464ebd5Sriastradh [SQ_ALU_SCL_210] = { 2, 1, 0 }, 3953464ebd5Sriastradh [SQ_ALU_SCL_122] = { 1, 2, 2 }, 3963464ebd5Sriastradh [SQ_ALU_SCL_212] = { 2, 1, 2 }, 3973464ebd5Sriastradh [SQ_ALU_SCL_221] = { 2, 2, 1 } 3983464ebd5Sriastradh}; 3993464ebd5Sriastradh 4003464ebd5Sriastradhstatic void init_bank_swizzle(struct alu_bank_swizzle *bs) 4013464ebd5Sriastradh{ 4023464ebd5Sriastradh int i, cycle, component; 4033464ebd5Sriastradh /* set up gpr use */ 4043464ebd5Sriastradh for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) 4053464ebd5Sriastradh for (component = 0; component < NUM_OF_COMPONENTS; component++) 4063464ebd5Sriastradh bs->hw_gpr[cycle][component] = -1; 4073464ebd5Sriastradh for (i = 0; i < 4; i++) 4083464ebd5Sriastradh bs->hw_cfile_addr[i] = -1; 4093464ebd5Sriastradh for (i = 0; i < 4; i++) 4103464ebd5Sriastradh bs->hw_cfile_elem[i] = -1; 4113464ebd5Sriastradh} 4123464ebd5Sriastradh 4133464ebd5Sriastradhstatic int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) 4143464ebd5Sriastradh{ 4153464ebd5Sriastradh if (bs->hw_gpr[cycle][chan] == -1) 4163464ebd5Sriastradh bs->hw_gpr[cycle][chan] = sel; 4173464ebd5Sriastradh else if (bs->hw_gpr[cycle][chan] != (int)sel) { 4183464ebd5Sriastradh /* Another scalar operation has already used the GPR read port for the channel. */ 4193464ebd5Sriastradh return -1; 4203464ebd5Sriastradh } 4213464ebd5Sriastradh return 0; 4223464ebd5Sriastradh} 4233464ebd5Sriastradh 42401e04c3fSmrgstatic int reserve_cfile(const struct r600_bytecode *bc, 42501e04c3fSmrg struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) 4263464ebd5Sriastradh{ 4273464ebd5Sriastradh int res, num_res = 4; 428af69d88dSmrg if (bc->chip_class >= R700) { 4293464ebd5Sriastradh num_res = 2; 4303464ebd5Sriastradh chan /= 2; 4313464ebd5Sriastradh } 4323464ebd5Sriastradh for (res = 0; res < num_res; ++res) { 4333464ebd5Sriastradh if (bs->hw_cfile_addr[res] == -1) { 4343464ebd5Sriastradh bs->hw_cfile_addr[res] = sel; 4353464ebd5Sriastradh bs->hw_cfile_elem[res] = chan; 4363464ebd5Sriastradh return 0; 4373464ebd5Sriastradh } else if (bs->hw_cfile_addr[res] == sel && 4383464ebd5Sriastradh bs->hw_cfile_elem[res] == chan) 4393464ebd5Sriastradh return 0; /* Read for this scalar element already reserved, nothing to do here. */ 4403464ebd5Sriastradh } 4413464ebd5Sriastradh /* All cfile read ports are used, cannot reference vector element. */ 4423464ebd5Sriastradh return -1; 4433464ebd5Sriastradh} 4443464ebd5Sriastradh 4453464ebd5Sriastradhstatic int is_gpr(unsigned sel) 4463464ebd5Sriastradh{ 447af69d88dSmrg return (sel <= 127); 4483464ebd5Sriastradh} 4493464ebd5Sriastradh 4503464ebd5Sriastradh/* CB constants start at 512, and get translated to a kcache index when ALU 4513464ebd5Sriastradh * clauses are constructed. Note that we handle kcache constants the same way 4523464ebd5Sriastradh * as (the now gone) cfile constants, is that really required? */ 4533464ebd5Sriastradhstatic int is_cfile(unsigned sel) 4543464ebd5Sriastradh{ 4553464ebd5Sriastradh return (sel > 255 && sel < 512) || 4563464ebd5Sriastradh (sel > 511 && sel < 4607) || /* Kcache before translation. */ 4573464ebd5Sriastradh (sel > 127 && sel < 192); /* Kcache after translation. */ 4583464ebd5Sriastradh} 4593464ebd5Sriastradh 4603464ebd5Sriastradhstatic int is_const(int sel) 4613464ebd5Sriastradh{ 4623464ebd5Sriastradh return is_cfile(sel) || 4633464ebd5Sriastradh (sel >= V_SQ_ALU_SRC_0 && 4643464ebd5Sriastradh sel <= V_SQ_ALU_SRC_LITERAL); 4653464ebd5Sriastradh} 4663464ebd5Sriastradh 46701e04c3fSmrgstatic int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, 4683464ebd5Sriastradh struct alu_bank_swizzle *bs, int bank_swizzle) 4693464ebd5Sriastradh{ 4703464ebd5Sriastradh int r, src, num_src, sel, elem, cycle; 4713464ebd5Sriastradh 47201e04c3fSmrg num_src = r600_bytecode_get_num_operands(alu); 4733464ebd5Sriastradh for (src = 0; src < num_src; src++) { 4743464ebd5Sriastradh sel = alu->src[src].sel; 4753464ebd5Sriastradh elem = alu->src[src].chan; 4763464ebd5Sriastradh if (is_gpr(sel)) { 4773464ebd5Sriastradh cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; 4783464ebd5Sriastradh if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) 4793464ebd5Sriastradh /* Nothing to do; special-case optimization, 4803464ebd5Sriastradh * second source uses first source’s reservation. */ 4813464ebd5Sriastradh continue; 4823464ebd5Sriastradh else { 4833464ebd5Sriastradh r = reserve_gpr(bs, sel, elem, cycle); 4843464ebd5Sriastradh if (r) 4853464ebd5Sriastradh return r; 4863464ebd5Sriastradh } 4873464ebd5Sriastradh } else if (is_cfile(sel)) { 488af69d88dSmrg r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 4893464ebd5Sriastradh if (r) 4903464ebd5Sriastradh return r; 4913464ebd5Sriastradh } 4923464ebd5Sriastradh /* No restrictions on PV, PS, literal or special constants. */ 4933464ebd5Sriastradh } 4943464ebd5Sriastradh return 0; 4953464ebd5Sriastradh} 4963464ebd5Sriastradh 49701e04c3fSmrgstatic int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, 4983464ebd5Sriastradh struct alu_bank_swizzle *bs, int bank_swizzle) 4993464ebd5Sriastradh{ 5003464ebd5Sriastradh int r, src, num_src, const_count, sel, elem, cycle; 5013464ebd5Sriastradh 50201e04c3fSmrg num_src = r600_bytecode_get_num_operands(alu); 5033464ebd5Sriastradh for (const_count = 0, src = 0; src < num_src; ++src) { 5043464ebd5Sriastradh sel = alu->src[src].sel; 5053464ebd5Sriastradh elem = alu->src[src].chan; 5063464ebd5Sriastradh if (is_const(sel)) { /* Any constant, including literal and inline constants. */ 5073464ebd5Sriastradh if (const_count >= 2) 5083464ebd5Sriastradh /* More than two references to a constant in 5093464ebd5Sriastradh * transcendental operation. */ 5103464ebd5Sriastradh return -1; 5113464ebd5Sriastradh else 5123464ebd5Sriastradh const_count++; 5133464ebd5Sriastradh } 5143464ebd5Sriastradh if (is_cfile(sel)) { 515af69d88dSmrg r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 5163464ebd5Sriastradh if (r) 5173464ebd5Sriastradh return r; 5183464ebd5Sriastradh } 5193464ebd5Sriastradh } 5203464ebd5Sriastradh for (src = 0; src < num_src; ++src) { 5213464ebd5Sriastradh sel = alu->src[src].sel; 5223464ebd5Sriastradh elem = alu->src[src].chan; 5233464ebd5Sriastradh if (is_gpr(sel)) { 5243464ebd5Sriastradh cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 5253464ebd5Sriastradh if (cycle < const_count) 5263464ebd5Sriastradh /* Cycle for GPR load conflicts with 5273464ebd5Sriastradh * constant load in transcendental operation. */ 5283464ebd5Sriastradh return -1; 5293464ebd5Sriastradh r = reserve_gpr(bs, sel, elem, cycle); 5303464ebd5Sriastradh if (r) 5313464ebd5Sriastradh return r; 5323464ebd5Sriastradh } 5333464ebd5Sriastradh /* PV PS restrictions */ 5343464ebd5Sriastradh if (const_count && (sel == 254 || sel == 255)) { 5353464ebd5Sriastradh cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 5363464ebd5Sriastradh if (cycle < const_count) 5373464ebd5Sriastradh return -1; 5383464ebd5Sriastradh } 5393464ebd5Sriastradh } 5403464ebd5Sriastradh return 0; 5413464ebd5Sriastradh} 5423464ebd5Sriastradh 54301e04c3fSmrgstatic int check_and_set_bank_swizzle(const struct r600_bytecode *bc, 544af69d88dSmrg struct r600_bytecode_alu *slots[5]) 5453464ebd5Sriastradh{ 5463464ebd5Sriastradh struct alu_bank_swizzle bs; 5473464ebd5Sriastradh int bank_swizzle[5]; 5483464ebd5Sriastradh int i, r = 0, forced = 1; 549af69d88dSmrg boolean scalar_only = bc->chip_class == CAYMAN ? false : true; 550af69d88dSmrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 5513464ebd5Sriastradh 5523464ebd5Sriastradh for (i = 0; i < max_slots; i++) { 5533464ebd5Sriastradh if (slots[i]) { 5543464ebd5Sriastradh if (slots[i]->bank_swizzle_force) { 5553464ebd5Sriastradh slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; 5563464ebd5Sriastradh } else { 5573464ebd5Sriastradh forced = 0; 5583464ebd5Sriastradh } 5593464ebd5Sriastradh } 5603464ebd5Sriastradh 5613464ebd5Sriastradh if (i < 4 && slots[i]) 5623464ebd5Sriastradh scalar_only = false; 5633464ebd5Sriastradh } 5643464ebd5Sriastradh if (forced) 5653464ebd5Sriastradh return 0; 5663464ebd5Sriastradh 5673464ebd5Sriastradh /* Just check every possible combination of bank swizzle. 5683464ebd5Sriastradh * Not very efficent, but works on the first try in most of the cases. */ 5693464ebd5Sriastradh for (i = 0; i < 4; i++) 5703464ebd5Sriastradh if (!slots[i] || !slots[i]->bank_swizzle_force) 5713464ebd5Sriastradh bank_swizzle[i] = SQ_ALU_VEC_012; 5723464ebd5Sriastradh else 5733464ebd5Sriastradh bank_swizzle[i] = slots[i]->bank_swizzle; 5743464ebd5Sriastradh 5753464ebd5Sriastradh bank_swizzle[4] = SQ_ALU_SCL_210; 5763464ebd5Sriastradh while(bank_swizzle[4] <= SQ_ALU_SCL_221) { 5773464ebd5Sriastradh 5783464ebd5Sriastradh init_bank_swizzle(&bs); 5793464ebd5Sriastradh if (scalar_only == false) { 5803464ebd5Sriastradh for (i = 0; i < 4; i++) { 5813464ebd5Sriastradh if (slots[i]) { 5823464ebd5Sriastradh r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); 5833464ebd5Sriastradh if (r) 5843464ebd5Sriastradh break; 5853464ebd5Sriastradh } 5863464ebd5Sriastradh } 5873464ebd5Sriastradh } else 5883464ebd5Sriastradh r = 0; 5893464ebd5Sriastradh 590af69d88dSmrg if (!r && max_slots == 5 && slots[4]) { 5913464ebd5Sriastradh r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); 5923464ebd5Sriastradh } 5933464ebd5Sriastradh if (!r) { 5943464ebd5Sriastradh for (i = 0; i < max_slots; i++) { 5953464ebd5Sriastradh if (slots[i]) 5963464ebd5Sriastradh slots[i]->bank_swizzle = bank_swizzle[i]; 5973464ebd5Sriastradh } 5983464ebd5Sriastradh return 0; 5993464ebd5Sriastradh } 6003464ebd5Sriastradh 6013464ebd5Sriastradh if (scalar_only) { 6023464ebd5Sriastradh bank_swizzle[4]++; 6033464ebd5Sriastradh } else { 6043464ebd5Sriastradh for (i = 0; i < max_slots; i++) { 6053464ebd5Sriastradh if (!slots[i] || !slots[i]->bank_swizzle_force) { 6063464ebd5Sriastradh bank_swizzle[i]++; 6073464ebd5Sriastradh if (bank_swizzle[i] <= SQ_ALU_VEC_210) 6083464ebd5Sriastradh break; 609af69d88dSmrg else if (i < max_slots - 1) 6103464ebd5Sriastradh bank_swizzle[i] = SQ_ALU_VEC_012; 611af69d88dSmrg else 612af69d88dSmrg return -1; 6133464ebd5Sriastradh } 6143464ebd5Sriastradh } 6153464ebd5Sriastradh } 6163464ebd5Sriastradh } 6173464ebd5Sriastradh 6183464ebd5Sriastradh /* Couldn't find a working swizzle. */ 6193464ebd5Sriastradh return -1; 6203464ebd5Sriastradh} 6213464ebd5Sriastradh 622af69d88dSmrgstatic int replace_gpr_with_pv_ps(struct r600_bytecode *bc, 623af69d88dSmrg struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) 6243464ebd5Sriastradh{ 625af69d88dSmrg struct r600_bytecode_alu *prev[5]; 6263464ebd5Sriastradh int gpr[5], chan[5]; 6273464ebd5Sriastradh int i, j, r, src, num_src; 628af69d88dSmrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 6293464ebd5Sriastradh 6303464ebd5Sriastradh r = assign_alu_units(bc, alu_prev, prev); 6313464ebd5Sriastradh if (r) 6323464ebd5Sriastradh return r; 6333464ebd5Sriastradh 6343464ebd5Sriastradh for (i = 0; i < max_slots; ++i) { 63501e04c3fSmrg if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) { 63601e04c3fSmrg 63701e04c3fSmrg if (is_alu_64bit_inst(prev[i])) { 63801e04c3fSmrg gpr[i] = -1; 63901e04c3fSmrg continue; 64001e04c3fSmrg } 64101e04c3fSmrg 6423464ebd5Sriastradh gpr[i] = prev[i]->dst.sel; 6433464ebd5Sriastradh /* cube writes more than PV.X */ 644af69d88dSmrg if (is_alu_reduction_inst(bc, prev[i])) 6453464ebd5Sriastradh chan[i] = 0; 6463464ebd5Sriastradh else 6473464ebd5Sriastradh chan[i] = prev[i]->dst.chan; 6483464ebd5Sriastradh } else 6493464ebd5Sriastradh gpr[i] = -1; 6503464ebd5Sriastradh } 6513464ebd5Sriastradh 6523464ebd5Sriastradh for (i = 0; i < max_slots; ++i) { 653af69d88dSmrg struct r600_bytecode_alu *alu = slots[i]; 65401e04c3fSmrg if (!alu) 6553464ebd5Sriastradh continue; 6563464ebd5Sriastradh 65701e04c3fSmrg if (is_alu_64bit_inst(alu)) 65801e04c3fSmrg continue; 65901e04c3fSmrg num_src = r600_bytecode_get_num_operands(alu); 6603464ebd5Sriastradh for (src = 0; src < num_src; ++src) { 6613464ebd5Sriastradh if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) 6623464ebd5Sriastradh continue; 6633464ebd5Sriastradh 664af69d88dSmrg if (bc->chip_class < CAYMAN) { 6653464ebd5Sriastradh if (alu->src[src].sel == gpr[4] && 666af69d88dSmrg alu->src[src].chan == chan[4] && 667af69d88dSmrg alu_prev->pred_sel == alu->pred_sel) { 6683464ebd5Sriastradh alu->src[src].sel = V_SQ_ALU_SRC_PS; 6693464ebd5Sriastradh alu->src[src].chan = 0; 6703464ebd5Sriastradh continue; 6713464ebd5Sriastradh } 6723464ebd5Sriastradh } 6733464ebd5Sriastradh 6743464ebd5Sriastradh for (j = 0; j < 4; ++j) { 6753464ebd5Sriastradh if (alu->src[src].sel == gpr[j] && 676af69d88dSmrg alu->src[src].chan == j && 677af69d88dSmrg alu_prev->pred_sel == alu->pred_sel) { 6783464ebd5Sriastradh alu->src[src].sel = V_SQ_ALU_SRC_PV; 6793464ebd5Sriastradh alu->src[src].chan = chan[j]; 6803464ebd5Sriastradh break; 6813464ebd5Sriastradh } 6823464ebd5Sriastradh } 6833464ebd5Sriastradh } 6843464ebd5Sriastradh } 6853464ebd5Sriastradh 6863464ebd5Sriastradh return 0; 6873464ebd5Sriastradh} 6883464ebd5Sriastradh 6897ec681f3Smrgvoid r600_bytecode_special_constants(uint32_t value, unsigned *sel) 6903464ebd5Sriastradh{ 6913464ebd5Sriastradh switch(value) { 6923464ebd5Sriastradh case 0: 6933464ebd5Sriastradh *sel = V_SQ_ALU_SRC_0; 6943464ebd5Sriastradh break; 6953464ebd5Sriastradh case 1: 6963464ebd5Sriastradh *sel = V_SQ_ALU_SRC_1_INT; 6973464ebd5Sriastradh break; 6983464ebd5Sriastradh case -1: 6993464ebd5Sriastradh *sel = V_SQ_ALU_SRC_M_1_INT; 7003464ebd5Sriastradh break; 7013464ebd5Sriastradh case 0x3F800000: /* 1.0f */ 7023464ebd5Sriastradh *sel = V_SQ_ALU_SRC_1; 7033464ebd5Sriastradh break; 7043464ebd5Sriastradh case 0x3F000000: /* 0.5f */ 7053464ebd5Sriastradh *sel = V_SQ_ALU_SRC_0_5; 7063464ebd5Sriastradh break; 7073464ebd5Sriastradh default: 7083464ebd5Sriastradh *sel = V_SQ_ALU_SRC_LITERAL; 7093464ebd5Sriastradh break; 7103464ebd5Sriastradh } 7113464ebd5Sriastradh} 7123464ebd5Sriastradh 7133464ebd5Sriastradh/* compute how many literal are needed */ 71401e04c3fSmrgstatic int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu, 7153464ebd5Sriastradh uint32_t literal[4], unsigned *nliteral) 7163464ebd5Sriastradh{ 71701e04c3fSmrg unsigned num_src = r600_bytecode_get_num_operands(alu); 7183464ebd5Sriastradh unsigned i, j; 7193464ebd5Sriastradh 7203464ebd5Sriastradh for (i = 0; i < num_src; ++i) { 7213464ebd5Sriastradh if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 7223464ebd5Sriastradh uint32_t value = alu->src[i].value; 7233464ebd5Sriastradh unsigned found = 0; 7243464ebd5Sriastradh for (j = 0; j < *nliteral; ++j) { 7253464ebd5Sriastradh if (literal[j] == value) { 7263464ebd5Sriastradh found = 1; 7273464ebd5Sriastradh break; 7283464ebd5Sriastradh } 7293464ebd5Sriastradh } 7303464ebd5Sriastradh if (!found) { 7313464ebd5Sriastradh if (*nliteral >= 4) 7323464ebd5Sriastradh return -EINVAL; 7333464ebd5Sriastradh literal[(*nliteral)++] = value; 7343464ebd5Sriastradh } 7353464ebd5Sriastradh } 7363464ebd5Sriastradh } 7373464ebd5Sriastradh return 0; 7383464ebd5Sriastradh} 7393464ebd5Sriastradh 74001e04c3fSmrgstatic void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu, 74101e04c3fSmrg uint32_t literal[4], unsigned nliteral) 7423464ebd5Sriastradh{ 74301e04c3fSmrg unsigned num_src = r600_bytecode_get_num_operands(alu); 7443464ebd5Sriastradh unsigned i, j; 7453464ebd5Sriastradh 7463464ebd5Sriastradh for (i = 0; i < num_src; ++i) { 7473464ebd5Sriastradh if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 7483464ebd5Sriastradh uint32_t value = alu->src[i].value; 7493464ebd5Sriastradh for (j = 0; j < nliteral; ++j) { 7503464ebd5Sriastradh if (literal[j] == value) { 7513464ebd5Sriastradh alu->src[i].chan = j; 7523464ebd5Sriastradh break; 7533464ebd5Sriastradh } 7543464ebd5Sriastradh } 7553464ebd5Sriastradh } 7563464ebd5Sriastradh } 7573464ebd5Sriastradh} 7583464ebd5Sriastradh 759af69d88dSmrgstatic int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], 760af69d88dSmrg struct r600_bytecode_alu *alu_prev) 7613464ebd5Sriastradh{ 762af69d88dSmrg struct r600_bytecode_alu *prev[5]; 763af69d88dSmrg struct r600_bytecode_alu *result[5] = { NULL }; 7643464ebd5Sriastradh 7657ec681f3Smrg uint8_t interp_xz = 0; 7667ec681f3Smrg 7673464ebd5Sriastradh uint32_t literal[4], prev_literal[4]; 7683464ebd5Sriastradh unsigned nliteral = 0, prev_nliteral = 0; 7693464ebd5Sriastradh 7703464ebd5Sriastradh int i, j, r, src, num_src; 7713464ebd5Sriastradh int num_once_inst = 0; 7723464ebd5Sriastradh int have_mova = 0, have_rel = 0; 773af69d88dSmrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 7743464ebd5Sriastradh 7753464ebd5Sriastradh r = assign_alu_units(bc, alu_prev, prev); 7763464ebd5Sriastradh if (r) 7773464ebd5Sriastradh return r; 7783464ebd5Sriastradh 7793464ebd5Sriastradh for (i = 0; i < max_slots; ++i) { 780af69d88dSmrg if (prev[i]) { 781af69d88dSmrg if (prev[i]->pred_sel) 782af69d88dSmrg return 0; 78301e04c3fSmrg if (is_alu_once_inst(prev[i])) 784af69d88dSmrg return 0; 7857ec681f3Smrg 7867ec681f3Smrg if (prev[i]->op == ALU_OP2_INTERP_X) 7877ec681f3Smrg interp_xz |= 1; 7887ec681f3Smrg if (prev[i]->op == ALU_OP2_INTERP_Z) 7897ec681f3Smrg interp_xz |= 2; 790af69d88dSmrg } 791af69d88dSmrg if (slots[i]) { 792af69d88dSmrg if (slots[i]->pred_sel) 793af69d88dSmrg return 0; 79401e04c3fSmrg if (is_alu_once_inst(slots[i])) 795af69d88dSmrg return 0; 7967ec681f3Smrg if (slots[i]->op == ALU_OP2_INTERP_X) 7977ec681f3Smrg interp_xz |= 1; 7987ec681f3Smrg if (slots[i]->op == ALU_OP2_INTERP_Z) 7997ec681f3Smrg interp_xz |= 2; 800af69d88dSmrg } 8017ec681f3Smrg if (interp_xz == 3) 8027ec681f3Smrg return 0; 803af69d88dSmrg } 804af69d88dSmrg 805af69d88dSmrg for (i = 0; i < max_slots; ++i) { 806af69d88dSmrg struct r600_bytecode_alu *alu; 807af69d88dSmrg 808af69d88dSmrg if (num_once_inst > 0) 809af69d88dSmrg return 0; 8103464ebd5Sriastradh 8113464ebd5Sriastradh /* check number of literals */ 8123464ebd5Sriastradh if (prev[i]) { 81301e04c3fSmrg if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral)) 8143464ebd5Sriastradh return 0; 81501e04c3fSmrg if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral)) 8163464ebd5Sriastradh return 0; 81701e04c3fSmrg if (is_alu_mova_inst(prev[i])) { 8183464ebd5Sriastradh if (have_rel) 8193464ebd5Sriastradh return 0; 8203464ebd5Sriastradh have_mova = 1; 8213464ebd5Sriastradh } 822af69d88dSmrg 82301e04c3fSmrg if (alu_uses_rel(prev[i])) { 824af69d88dSmrg if (have_mova) { 825af69d88dSmrg return 0; 826af69d88dSmrg } 827af69d88dSmrg have_rel = 1; 828af69d88dSmrg } 82901e04c3fSmrg if (alu_uses_lds(prev[i])) 83001e04c3fSmrg return 0; 831af69d88dSmrg 83201e04c3fSmrg num_once_inst += is_alu_once_inst(prev[i]); 8333464ebd5Sriastradh } 83401e04c3fSmrg if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral)) 8353464ebd5Sriastradh return 0; 8363464ebd5Sriastradh 8373464ebd5Sriastradh /* Let's check used slots. */ 8383464ebd5Sriastradh if (prev[i] && !slots[i]) { 8393464ebd5Sriastradh result[i] = prev[i]; 8403464ebd5Sriastradh continue; 8413464ebd5Sriastradh } else if (prev[i] && slots[i]) { 8423464ebd5Sriastradh if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { 8433464ebd5Sriastradh /* Trans unit is still free try to use it. */ 84401e04c3fSmrg if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) { 8453464ebd5Sriastradh result[i] = prev[i]; 8463464ebd5Sriastradh result[4] = slots[i]; 8473464ebd5Sriastradh } else if (is_alu_any_unit_inst(bc, prev[i])) { 848af69d88dSmrg if (slots[i]->dst.sel == prev[i]->dst.sel && 84901e04c3fSmrg alu_writes(slots[i]) && 85001e04c3fSmrg alu_writes(prev[i])) 851af69d88dSmrg return 0; 852af69d88dSmrg 8533464ebd5Sriastradh result[i] = slots[i]; 8543464ebd5Sriastradh result[4] = prev[i]; 8553464ebd5Sriastradh } else 8563464ebd5Sriastradh return 0; 8573464ebd5Sriastradh } else 8583464ebd5Sriastradh return 0; 8593464ebd5Sriastradh } else if(!slots[i]) { 8603464ebd5Sriastradh continue; 861af69d88dSmrg } else { 862af69d88dSmrg if (max_slots == 5 && slots[i] && prev[4] && 863af69d88dSmrg slots[i]->dst.sel == prev[4]->dst.sel && 864af69d88dSmrg slots[i]->dst.chan == prev[4]->dst.chan && 86501e04c3fSmrg alu_writes(slots[i]) && 86601e04c3fSmrg alu_writes(prev[4])) 867af69d88dSmrg return 0; 868af69d88dSmrg 8693464ebd5Sriastradh result[i] = slots[i]; 870af69d88dSmrg } 8713464ebd5Sriastradh 8723464ebd5Sriastradh alu = slots[i]; 87301e04c3fSmrg num_once_inst += is_alu_once_inst(alu); 8743464ebd5Sriastradh 875af69d88dSmrg /* don't reschedule NOPs */ 87601e04c3fSmrg if (is_nop_inst(alu)) 877af69d88dSmrg return 0; 878af69d88dSmrg 87901e04c3fSmrg if (is_alu_mova_inst(alu)) { 880af69d88dSmrg if (have_rel) { 8813464ebd5Sriastradh return 0; 882af69d88dSmrg } 883af69d88dSmrg have_mova = 1; 884af69d88dSmrg } 885af69d88dSmrg 88601e04c3fSmrg if (alu_uses_rel(alu)) { 887af69d88dSmrg if (have_mova) { 888af69d88dSmrg return 0; 889af69d88dSmrg } 8903464ebd5Sriastradh have_rel = 1; 8913464ebd5Sriastradh } 8923464ebd5Sriastradh 89301e04c3fSmrg if (alu->op == ALU_OP0_SET_CF_IDX0 || 89401e04c3fSmrg alu->op == ALU_OP0_SET_CF_IDX1) 89501e04c3fSmrg return 0; /* data hazard with MOVA */ 89601e04c3fSmrg 8973464ebd5Sriastradh /* Let's check source gprs */ 89801e04c3fSmrg num_src = r600_bytecode_get_num_operands(alu); 8993464ebd5Sriastradh for (src = 0; src < num_src; ++src) { 9003464ebd5Sriastradh 9013464ebd5Sriastradh /* Constants don't matter. */ 9023464ebd5Sriastradh if (!is_gpr(alu->src[src].sel)) 9033464ebd5Sriastradh continue; 9043464ebd5Sriastradh 9053464ebd5Sriastradh for (j = 0; j < max_slots; ++j) { 90601e04c3fSmrg if (!prev[j] || !alu_writes(prev[j])) 9073464ebd5Sriastradh continue; 9083464ebd5Sriastradh 9093464ebd5Sriastradh /* If it's relative then we can't determin which gpr is really used. */ 9103464ebd5Sriastradh if (prev[j]->dst.chan == alu->src[src].chan && 9113464ebd5Sriastradh (prev[j]->dst.sel == alu->src[src].sel || 9123464ebd5Sriastradh prev[j]->dst.rel || alu->src[src].rel)) 9133464ebd5Sriastradh return 0; 9143464ebd5Sriastradh } 9153464ebd5Sriastradh } 9163464ebd5Sriastradh } 9173464ebd5Sriastradh 9183464ebd5Sriastradh /* more than one PRED_ or KILL_ ? */ 9193464ebd5Sriastradh if (num_once_inst > 1) 9203464ebd5Sriastradh return 0; 9213464ebd5Sriastradh 9223464ebd5Sriastradh /* check if the result can still be swizzlet */ 9233464ebd5Sriastradh r = check_and_set_bank_swizzle(bc, result); 9243464ebd5Sriastradh if (r) 9253464ebd5Sriastradh return 0; 9263464ebd5Sriastradh 9273464ebd5Sriastradh /* looks like everything worked out right, apply the changes */ 9283464ebd5Sriastradh 9293464ebd5Sriastradh /* undo adding previus literals */ 9303464ebd5Sriastradh bc->cf_last->ndw -= align(prev_nliteral, 2); 9313464ebd5Sriastradh 9323464ebd5Sriastradh /* sort instructions */ 9333464ebd5Sriastradh for (i = 0; i < max_slots; ++i) { 9343464ebd5Sriastradh slots[i] = result[i]; 9353464ebd5Sriastradh if (result[i]) { 9367ec681f3Smrg list_del(&result[i]->list); 9373464ebd5Sriastradh result[i]->last = 0; 9387ec681f3Smrg list_addtail(&result[i]->list, &bc->cf_last->alu); 9393464ebd5Sriastradh } 9403464ebd5Sriastradh } 9413464ebd5Sriastradh 9423464ebd5Sriastradh /* determine new last instruction */ 943af69d88dSmrg LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1; 9443464ebd5Sriastradh 9453464ebd5Sriastradh /* determine new first instruction */ 9463464ebd5Sriastradh for (i = 0; i < max_slots; ++i) { 9473464ebd5Sriastradh if (result[i]) { 9483464ebd5Sriastradh bc->cf_last->curr_bs_head = result[i]; 9493464ebd5Sriastradh break; 9503464ebd5Sriastradh } 9513464ebd5Sriastradh } 9523464ebd5Sriastradh 9533464ebd5Sriastradh bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; 9543464ebd5Sriastradh bc->cf_last->prev2_bs_head = NULL; 9553464ebd5Sriastradh 9563464ebd5Sriastradh return 0; 9573464ebd5Sriastradh} 9583464ebd5Sriastradh 959af69d88dSmrg/* we'll keep kcache sets sorted by bank & addr */ 960af69d88dSmrgstatic int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc, 961af69d88dSmrg struct r600_bytecode_kcache *kcache, 96201e04c3fSmrg unsigned bank, unsigned line, unsigned index_mode) 9633464ebd5Sriastradh{ 964af69d88dSmrg int i, kcache_banks = bc->chip_class >= EVERGREEN ? 4 : 2; 9653464ebd5Sriastradh 966af69d88dSmrg for (i = 0; i < kcache_banks; i++) { 967af69d88dSmrg if (kcache[i].mode) { 968af69d88dSmrg int d; 9693464ebd5Sriastradh 970af69d88dSmrg if (kcache[i].bank < bank) 971af69d88dSmrg continue; 9723464ebd5Sriastradh 973af69d88dSmrg if ((kcache[i].bank == bank && kcache[i].addr > line+1) || 974af69d88dSmrg kcache[i].bank > bank) { 975af69d88dSmrg /* try to insert new line */ 976af69d88dSmrg if (kcache[kcache_banks-1].mode) { 977af69d88dSmrg /* all sets are in use */ 978af69d88dSmrg return -ENOMEM; 979af69d88dSmrg } 9803464ebd5Sriastradh 981af69d88dSmrg memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache)); 982af69d88dSmrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 983af69d88dSmrg kcache[i].bank = bank; 984af69d88dSmrg kcache[i].addr = line; 98501e04c3fSmrg kcache[i].index_mode = index_mode; 986af69d88dSmrg return 0; 9873464ebd5Sriastradh } 9883464ebd5Sriastradh 989af69d88dSmrg d = line - kcache[i].addr; 990af69d88dSmrg 991af69d88dSmrg if (d == -1) { 992af69d88dSmrg kcache[i].addr--; 993af69d88dSmrg if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) { 994af69d88dSmrg /* we are prepending the line to the current set, 995af69d88dSmrg * discarding the existing second line, 996af69d88dSmrg * so we'll have to insert line+2 after it */ 997af69d88dSmrg line += 2; 998af69d88dSmrg continue; 999af69d88dSmrg } else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) { 1000af69d88dSmrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1001af69d88dSmrg return 0; 1002af69d88dSmrg } else { 1003af69d88dSmrg /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */ 1004af69d88dSmrg return -ENOMEM; 1005af69d88dSmrg } 1006af69d88dSmrg } else if (d == 1) { 1007af69d88dSmrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1008af69d88dSmrg return 0; 1009af69d88dSmrg } else if (d == 0) 1010af69d88dSmrg return 0; 1011af69d88dSmrg } else { /* free kcache set - use it */ 1012af69d88dSmrg kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1013af69d88dSmrg kcache[i].bank = bank; 1014af69d88dSmrg kcache[i].addr = line; 101501e04c3fSmrg kcache[i].index_mode = index_mode; 1016af69d88dSmrg return 0; 1017af69d88dSmrg } 10183464ebd5Sriastradh } 1019af69d88dSmrg return -ENOMEM; 1020af69d88dSmrg} 10213464ebd5Sriastradh 1022af69d88dSmrgstatic int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc, 1023af69d88dSmrg struct r600_bytecode_kcache *kcache, 1024af69d88dSmrg struct r600_bytecode_alu *alu) 1025af69d88dSmrg{ 1026af69d88dSmrg int i, r; 10273464ebd5Sriastradh 1028af69d88dSmrg for (i = 0; i < 3; i++) { 102901e04c3fSmrg unsigned bank, line, sel = alu->src[i].sel, index_mode; 1030af69d88dSmrg 1031af69d88dSmrg if (sel < 512) 1032af69d88dSmrg continue; 1033af69d88dSmrg 1034af69d88dSmrg bank = alu->src[i].kc_bank; 103501e04c3fSmrg assert(bank < R600_MAX_HW_CONST_BUFFERS); 1036af69d88dSmrg line = (sel-512)>>4; 103701e04c3fSmrg index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE 1038af69d88dSmrg 103901e04c3fSmrg if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode))) 1040af69d88dSmrg return r; 10413464ebd5Sriastradh } 1042af69d88dSmrg return 0; 1043af69d88dSmrg} 10443464ebd5Sriastradh 104501e04c3fSmrgstatic int r600_bytecode_assign_kcache_banks( 1046af69d88dSmrg struct r600_bytecode_alu *alu, 1047af69d88dSmrg struct r600_bytecode_kcache * kcache) 1048af69d88dSmrg{ 1049af69d88dSmrg int i, j; 1050af69d88dSmrg 1051af69d88dSmrg /* Alter the src operands to refer to the kcache. */ 1052af69d88dSmrg for (i = 0; i < 3; ++i) { 1053af69d88dSmrg static const unsigned int base[] = {128, 160, 256, 288}; 1054af69d88dSmrg unsigned int line, sel = alu->src[i].sel, found = 0; 1055af69d88dSmrg 1056af69d88dSmrg if (sel < 512) 1057af69d88dSmrg continue; 1058af69d88dSmrg 1059af69d88dSmrg sel -= 512; 1060af69d88dSmrg line = sel>>4; 1061af69d88dSmrg 1062af69d88dSmrg for (j = 0; j < 4 && !found; ++j) { 1063af69d88dSmrg switch (kcache[j].mode) { 1064af69d88dSmrg case V_SQ_CF_KCACHE_NOP: 1065af69d88dSmrg case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX: 1066af69d88dSmrg R600_ERR("unexpected kcache line mode\n"); 1067af69d88dSmrg return -ENOMEM; 1068af69d88dSmrg default: 1069af69d88dSmrg if (kcache[j].bank == alu->src[i].kc_bank && 1070af69d88dSmrg kcache[j].addr <= line && 1071af69d88dSmrg line < kcache[j].addr + kcache[j].mode) { 1072af69d88dSmrg alu->src[i].sel = sel - (kcache[j].addr<<4); 1073af69d88dSmrg alu->src[i].sel += base[j]; 1074af69d88dSmrg found=1; 1075af69d88dSmrg } 10763464ebd5Sriastradh } 10773464ebd5Sriastradh } 10783464ebd5Sriastradh } 1079af69d88dSmrg return 0; 1080af69d88dSmrg} 1081af69d88dSmrg 1082af69d88dSmrgstatic int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, 1083af69d88dSmrg struct r600_bytecode_alu *alu, 1084af69d88dSmrg unsigned type) 1085af69d88dSmrg{ 1086af69d88dSmrg struct r600_bytecode_kcache kcache_sets[4]; 1087af69d88dSmrg struct r600_bytecode_kcache *kcache = kcache_sets; 1088af69d88dSmrg int r; 1089af69d88dSmrg 1090af69d88dSmrg memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache)); 10913464ebd5Sriastradh 1092af69d88dSmrg if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1093af69d88dSmrg /* can't alloc, need to start new clause */ 1094af69d88dSmrg if ((r = r600_bytecode_add_cf(bc))) { 10953464ebd5Sriastradh return r; 10963464ebd5Sriastradh } 1097af69d88dSmrg bc->cf_last->op = type; 1098af69d88dSmrg 1099af69d88dSmrg /* retry with the new clause */ 11003464ebd5Sriastradh kcache = bc->cf_last->kcache; 1101af69d88dSmrg if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1102af69d88dSmrg /* can't alloc again- should never happen */ 1103af69d88dSmrg return r; 1104af69d88dSmrg } 1105af69d88dSmrg } else { 1106af69d88dSmrg /* update kcache sets */ 1107af69d88dSmrg memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache)); 11083464ebd5Sriastradh } 11093464ebd5Sriastradh 111001e04c3fSmrg /* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */ 111101e04c3fSmrg if (kcache[2].mode != V_SQ_CF_KCACHE_NOP || 111201e04c3fSmrg kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) { 1113af69d88dSmrg if (bc->chip_class < EVERGREEN) 1114af69d88dSmrg return -ENOMEM; 1115af69d88dSmrg bc->cf_last->eg_alu_extended = 1; 1116af69d88dSmrg } 11173464ebd5Sriastradh 1118af69d88dSmrg return 0; 1119af69d88dSmrg} 11203464ebd5Sriastradh 1121af69d88dSmrgstatic int insert_nop_r6xx(struct r600_bytecode *bc) 1122af69d88dSmrg{ 1123af69d88dSmrg struct r600_bytecode_alu alu; 1124af69d88dSmrg int r, i; 11253464ebd5Sriastradh 1126af69d88dSmrg for (i = 0; i < 4; i++) { 1127af69d88dSmrg memset(&alu, 0, sizeof(alu)); 1128af69d88dSmrg alu.op = ALU_OP0_NOP; 1129af69d88dSmrg alu.src[0].chan = i; 1130af69d88dSmrg alu.dst.chan = i; 1131af69d88dSmrg alu.last = (i == 3); 1132af69d88dSmrg r = r600_bytecode_add_alu(bc, &alu); 1133af69d88dSmrg if (r) 1134af69d88dSmrg return r; 11353464ebd5Sriastradh } 1136af69d88dSmrg return 0; 1137af69d88dSmrg} 11383464ebd5Sriastradh 1139af69d88dSmrg/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1140af69d88dSmrgstatic int load_ar_r6xx(struct r600_bytecode *bc) 1141af69d88dSmrg{ 1142af69d88dSmrg struct r600_bytecode_alu alu; 1143af69d88dSmrg int r; 11443464ebd5Sriastradh 1145af69d88dSmrg if (bc->ar_loaded) 1146af69d88dSmrg return 0; 11473464ebd5Sriastradh 1148af69d88dSmrg /* hack to avoid making MOVA the last instruction in the clause */ 1149af69d88dSmrg if ((bc->cf_last->ndw>>1) >= 110) 1150af69d88dSmrg bc->force_add_cf = 1; 11513464ebd5Sriastradh 1152af69d88dSmrg memset(&alu, 0, sizeof(alu)); 1153af69d88dSmrg alu.op = ALU_OP1_MOVA_GPR_INT; 1154af69d88dSmrg alu.src[0].sel = bc->ar_reg; 1155af69d88dSmrg alu.src[0].chan = bc->ar_chan; 1156af69d88dSmrg alu.last = 1; 1157af69d88dSmrg alu.index_mode = INDEX_MODE_LOOP; 1158af69d88dSmrg r = r600_bytecode_add_alu(bc, &alu); 1159af69d88dSmrg if (r) 1160af69d88dSmrg return r; 11613464ebd5Sriastradh 1162af69d88dSmrg /* no requirement to set uses waterfall on MOVA_GPR_INT */ 1163af69d88dSmrg bc->ar_loaded = 1; 11643464ebd5Sriastradh return 0; 11653464ebd5Sriastradh} 11663464ebd5Sriastradh 1167af69d88dSmrg/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1168af69d88dSmrgstatic int load_ar(struct r600_bytecode *bc) 11693464ebd5Sriastradh{ 1170af69d88dSmrg struct r600_bytecode_alu alu; 1171af69d88dSmrg int r; 1172af69d88dSmrg 1173af69d88dSmrg if (bc->ar_handling) 1174af69d88dSmrg return load_ar_r6xx(bc); 1175af69d88dSmrg 1176af69d88dSmrg if (bc->ar_loaded) 1177af69d88dSmrg return 0; 1178af69d88dSmrg 1179af69d88dSmrg /* hack to avoid making MOVA the last instruction in the clause */ 1180af69d88dSmrg if ((bc->cf_last->ndw>>1) >= 110) 1181af69d88dSmrg bc->force_add_cf = 1; 1182af69d88dSmrg 1183af69d88dSmrg memset(&alu, 0, sizeof(alu)); 1184af69d88dSmrg alu.op = ALU_OP1_MOVA_INT; 1185af69d88dSmrg alu.src[0].sel = bc->ar_reg; 1186af69d88dSmrg alu.src[0].chan = bc->ar_chan; 1187af69d88dSmrg alu.last = 1; 1188af69d88dSmrg r = r600_bytecode_add_alu(bc, &alu); 1189af69d88dSmrg if (r) 1190af69d88dSmrg return r; 1191af69d88dSmrg 1192af69d88dSmrg bc->cf_last->r6xx_uses_waterfall = 1; 1193af69d88dSmrg bc->ar_loaded = 1; 1194af69d88dSmrg return 0; 1195af69d88dSmrg} 1196af69d88dSmrg 1197af69d88dSmrgint r600_bytecode_add_alu_type(struct r600_bytecode *bc, 1198af69d88dSmrg const struct r600_bytecode_alu *alu, unsigned type) 1199af69d88dSmrg{ 1200af69d88dSmrg struct r600_bytecode_alu *nalu = r600_bytecode_alu(); 1201af69d88dSmrg struct r600_bytecode_alu *lalu; 12023464ebd5Sriastradh int i, r; 12033464ebd5Sriastradh 120401e04c3fSmrg if (!nalu) 12053464ebd5Sriastradh return -ENOMEM; 1206af69d88dSmrg memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); 12073464ebd5Sriastradh 120801e04c3fSmrg if (alu->is_op3) { 120901e04c3fSmrg /* will fail later since alu does not support it. */ 121001e04c3fSmrg assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); 121101e04c3fSmrg } 121201e04c3fSmrg 1213af69d88dSmrg if (bc->cf_last != NULL && bc->cf_last->op != type) { 12143464ebd5Sriastradh /* check if we could add it anyway */ 1215af69d88dSmrg if (bc->cf_last->op == CF_OP_ALU && 1216af69d88dSmrg type == CF_OP_ALU_PUSH_BEFORE) { 12173464ebd5Sriastradh LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { 1218af69d88dSmrg if (lalu->execute_mask) { 12193464ebd5Sriastradh bc->force_add_cf = 1; 12203464ebd5Sriastradh break; 12213464ebd5Sriastradh } 12223464ebd5Sriastradh } 12233464ebd5Sriastradh } else 12243464ebd5Sriastradh bc->force_add_cf = 1; 12253464ebd5Sriastradh } 12263464ebd5Sriastradh 12273464ebd5Sriastradh /* cf can contains only alu or only vtx or only tex */ 12283464ebd5Sriastradh if (bc->cf_last == NULL || bc->force_add_cf) { 1229af69d88dSmrg r = r600_bytecode_add_cf(bc); 12303464ebd5Sriastradh if (r) { 12313464ebd5Sriastradh free(nalu); 12323464ebd5Sriastradh return r; 12333464ebd5Sriastradh } 12343464ebd5Sriastradh } 1235af69d88dSmrg bc->cf_last->op = type; 1236af69d88dSmrg 123701e04c3fSmrg /* Load index register if required */ 123801e04c3fSmrg if (bc->chip_class >= EVERGREEN) { 123901e04c3fSmrg for (i = 0; i < 3; i++) 12407ec681f3Smrg if (nalu->src[i].kc_bank && nalu->src[i].kc_rel) 124101e04c3fSmrg egcm_load_index_reg(bc, 0, true); 124201e04c3fSmrg } 124301e04c3fSmrg 1244af69d88dSmrg /* Check AR usage and load it if required */ 1245af69d88dSmrg for (i = 0; i < 3; i++) 1246af69d88dSmrg if (nalu->src[i].rel && !bc->ar_loaded) 1247af69d88dSmrg load_ar(bc); 1248af69d88dSmrg 1249af69d88dSmrg if (nalu->dst.rel && !bc->ar_loaded) 1250af69d88dSmrg load_ar(bc); 12513464ebd5Sriastradh 12523464ebd5Sriastradh /* Setup the kcache for this ALU instruction. This will start a new 12533464ebd5Sriastradh * ALU clause if needed. */ 1254af69d88dSmrg if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { 12553464ebd5Sriastradh free(nalu); 12563464ebd5Sriastradh return r; 12573464ebd5Sriastradh } 12583464ebd5Sriastradh 12593464ebd5Sriastradh if (!bc->cf_last->curr_bs_head) { 12603464ebd5Sriastradh bc->cf_last->curr_bs_head = nalu; 12613464ebd5Sriastradh } 12623464ebd5Sriastradh /* number of gpr == the last gpr used in any alu */ 12633464ebd5Sriastradh for (i = 0; i < 3; i++) { 12643464ebd5Sriastradh if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { 12653464ebd5Sriastradh bc->ngpr = nalu->src[i].sel + 1; 12663464ebd5Sriastradh } 12673464ebd5Sriastradh if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) 1268af69d88dSmrg r600_bytecode_special_constants(nalu->src[i].value, 12697ec681f3Smrg &nalu->src[i].sel); 12703464ebd5Sriastradh } 12713464ebd5Sriastradh if (nalu->dst.sel >= bc->ngpr) { 12723464ebd5Sriastradh bc->ngpr = nalu->dst.sel + 1; 12733464ebd5Sriastradh } 12747ec681f3Smrg list_addtail(&nalu->list, &bc->cf_last->alu); 12753464ebd5Sriastradh /* each alu use 2 dwords */ 12763464ebd5Sriastradh bc->cf_last->ndw += 2; 12773464ebd5Sriastradh bc->ndw += 2; 12783464ebd5Sriastradh 12793464ebd5Sriastradh /* process cur ALU instructions for bank swizzle */ 12803464ebd5Sriastradh if (nalu->last) { 12813464ebd5Sriastradh uint32_t literal[4]; 12823464ebd5Sriastradh unsigned nliteral; 1283af69d88dSmrg struct r600_bytecode_alu *slots[5]; 1284af69d88dSmrg int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 12853464ebd5Sriastradh r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); 12863464ebd5Sriastradh if (r) 12873464ebd5Sriastradh return r; 12883464ebd5Sriastradh 12893464ebd5Sriastradh if (bc->cf_last->prev_bs_head) { 12903464ebd5Sriastradh r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); 12913464ebd5Sriastradh if (r) 12923464ebd5Sriastradh return r; 12933464ebd5Sriastradh } 12943464ebd5Sriastradh 12953464ebd5Sriastradh if (bc->cf_last->prev_bs_head) { 12963464ebd5Sriastradh r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); 12973464ebd5Sriastradh if (r) 12983464ebd5Sriastradh return r; 12993464ebd5Sriastradh } 13003464ebd5Sriastradh 13013464ebd5Sriastradh r = check_and_set_bank_swizzle(bc, slots); 13023464ebd5Sriastradh if (r) 13033464ebd5Sriastradh return r; 13043464ebd5Sriastradh 13053464ebd5Sriastradh for (i = 0, nliteral = 0; i < max_slots; i++) { 13063464ebd5Sriastradh if (slots[i]) { 130701e04c3fSmrg r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral); 13083464ebd5Sriastradh if (r) 13093464ebd5Sriastradh return r; 13103464ebd5Sriastradh } 13113464ebd5Sriastradh } 13123464ebd5Sriastradh bc->cf_last->ndw += align(nliteral, 2); 13133464ebd5Sriastradh 13143464ebd5Sriastradh /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) 13153464ebd5Sriastradh * worst case */ 13163464ebd5Sriastradh if ((bc->cf_last->ndw >> 1) >= 120) { 13173464ebd5Sriastradh bc->force_add_cf = 1; 13183464ebd5Sriastradh } 13193464ebd5Sriastradh 13203464ebd5Sriastradh bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; 13213464ebd5Sriastradh bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; 13223464ebd5Sriastradh bc->cf_last->curr_bs_head = NULL; 13233464ebd5Sriastradh } 1324af69d88dSmrg 1325af69d88dSmrg if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst) 1326af69d88dSmrg insert_nop_r6xx(bc); 1327af69d88dSmrg 132801e04c3fSmrg /* Might need to insert spill write ops after current clause */ 132901e04c3fSmrg if (nalu->last && bc->n_pending_outputs) { 133001e04c3fSmrg while (bc->n_pending_outputs) { 133101e04c3fSmrg r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]); 133201e04c3fSmrg if (r) 133301e04c3fSmrg return r; 133401e04c3fSmrg } 133501e04c3fSmrg } 133601e04c3fSmrg 13373464ebd5Sriastradh return 0; 13383464ebd5Sriastradh} 13393464ebd5Sriastradh 1340af69d88dSmrgint r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) 13413464ebd5Sriastradh{ 1342af69d88dSmrg return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU); 13433464ebd5Sriastradh} 13443464ebd5Sriastradh 1345af69d88dSmrgstatic unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) 13463464ebd5Sriastradh{ 1347af69d88dSmrg switch (bc->chip_class) { 1348af69d88dSmrg case R600: 13493464ebd5Sriastradh return 8; 13503464ebd5Sriastradh 1351af69d88dSmrg case R700: 1352af69d88dSmrg case EVERGREEN: 1353af69d88dSmrg case CAYMAN: 13543464ebd5Sriastradh return 16; 13553464ebd5Sriastradh 13563464ebd5Sriastradh default: 1357af69d88dSmrg R600_ERR("Unknown chip class %d.\n", bc->chip_class); 13583464ebd5Sriastradh return 8; 13593464ebd5Sriastradh } 13603464ebd5Sriastradh} 13613464ebd5Sriastradh 1362af69d88dSmrgstatic inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc) 13633464ebd5Sriastradh{ 1364af69d88dSmrg return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) && 136501e04c3fSmrg bc->cf_last->op != CF_OP_GDS && 136601e04c3fSmrg (bc->chip_class == CAYMAN || 136701e04c3fSmrg bc->cf_last->op != CF_OP_TEX)); 13683464ebd5Sriastradh} 13693464ebd5Sriastradh 137001e04c3fSmrgstatic int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx, 137101e04c3fSmrg bool use_tc) 13723464ebd5Sriastradh{ 1373af69d88dSmrg struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); 13743464ebd5Sriastradh int r; 13753464ebd5Sriastradh 137601e04c3fSmrg if (!nvtx) 13773464ebd5Sriastradh return -ENOMEM; 1378af69d88dSmrg memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); 13793464ebd5Sriastradh 138001e04c3fSmrg /* Load index register if required */ 138101e04c3fSmrg if (bc->chip_class >= EVERGREEN) { 138201e04c3fSmrg if (vtx->buffer_index_mode) 138301e04c3fSmrg egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false); 138401e04c3fSmrg } 138501e04c3fSmrg 13863464ebd5Sriastradh /* cf can contains only alu or only vtx or only tex */ 13873464ebd5Sriastradh if (bc->cf_last == NULL || 1388af69d88dSmrg last_inst_was_not_vtx_fetch(bc) || 13893464ebd5Sriastradh bc->force_add_cf) { 1390af69d88dSmrg r = r600_bytecode_add_cf(bc); 13913464ebd5Sriastradh if (r) { 13923464ebd5Sriastradh free(nvtx); 13933464ebd5Sriastradh return r; 13943464ebd5Sriastradh } 1395af69d88dSmrg switch (bc->chip_class) { 1396af69d88dSmrg case R600: 1397af69d88dSmrg case R700: 1398af69d88dSmrg bc->cf_last->op = CF_OP_VTX; 1399af69d88dSmrg break; 140001e04c3fSmrg case EVERGREEN: 140101e04c3fSmrg if (use_tc) 140201e04c3fSmrg bc->cf_last->op = CF_OP_TEX; 140301e04c3fSmrg else 140401e04c3fSmrg bc->cf_last->op = CF_OP_VTX; 140501e04c3fSmrg break; 1406af69d88dSmrg case CAYMAN: 1407af69d88dSmrg bc->cf_last->op = CF_OP_TEX; 1408af69d88dSmrg break; 1409af69d88dSmrg default: 1410af69d88dSmrg R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1411af69d88dSmrg free(nvtx); 1412af69d88dSmrg return -EINVAL; 1413af69d88dSmrg } 14143464ebd5Sriastradh } 14157ec681f3Smrg list_addtail(&nvtx->list, &bc->cf_last->vtx); 14163464ebd5Sriastradh /* each fetch use 4 dwords */ 14173464ebd5Sriastradh bc->cf_last->ndw += 4; 14183464ebd5Sriastradh bc->ndw += 4; 1419af69d88dSmrg if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 14203464ebd5Sriastradh bc->force_add_cf = 1; 1421af69d88dSmrg 1422af69d88dSmrg bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1); 1423af69d88dSmrg bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1); 1424af69d88dSmrg 14253464ebd5Sriastradh return 0; 14263464ebd5Sriastradh} 14273464ebd5Sriastradh 142801e04c3fSmrgint r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 142901e04c3fSmrg{ 143001e04c3fSmrg return r600_bytecode_add_vtx_internal(bc, vtx, false); 143101e04c3fSmrg} 143201e04c3fSmrg 143301e04c3fSmrgint r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 143401e04c3fSmrg{ 143501e04c3fSmrg return r600_bytecode_add_vtx_internal(bc, vtx, true); 143601e04c3fSmrg} 143701e04c3fSmrg 1438af69d88dSmrgint r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) 14393464ebd5Sriastradh{ 1440af69d88dSmrg struct r600_bytecode_tex *ntex = r600_bytecode_tex(); 14413464ebd5Sriastradh int r; 14423464ebd5Sriastradh 144301e04c3fSmrg if (!ntex) 14443464ebd5Sriastradh return -ENOMEM; 1445af69d88dSmrg memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); 14463464ebd5Sriastradh 144701e04c3fSmrg /* Load index register if required */ 144801e04c3fSmrg if (bc->chip_class >= EVERGREEN) { 144901e04c3fSmrg if (tex->sampler_index_mode || tex->resource_index_mode) 145001e04c3fSmrg egcm_load_index_reg(bc, 1, false); 145101e04c3fSmrg } 145201e04c3fSmrg 14533464ebd5Sriastradh /* we can't fetch data und use it as texture lookup address in the same TEX clause */ 14543464ebd5Sriastradh if (bc->cf_last != NULL && 1455af69d88dSmrg bc->cf_last->op == CF_OP_TEX) { 1456af69d88dSmrg struct r600_bytecode_tex *ttex; 14573464ebd5Sriastradh LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { 14587ec681f3Smrg if (ttex->dst_gpr == ntex->src_gpr && 14597ec681f3Smrg (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 || 14607ec681f3Smrg ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) { 14613464ebd5Sriastradh bc->force_add_cf = 1; 14623464ebd5Sriastradh break; 14633464ebd5Sriastradh } 14643464ebd5Sriastradh } 14653464ebd5Sriastradh /* slight hack to make gradients always go into same cf */ 1466af69d88dSmrg if (ntex->op == FETCH_OP_SET_GRADIENTS_H) 14673464ebd5Sriastradh bc->force_add_cf = 1; 14683464ebd5Sriastradh } 14693464ebd5Sriastradh 14703464ebd5Sriastradh /* cf can contains only alu or only vtx or only tex */ 14713464ebd5Sriastradh if (bc->cf_last == NULL || 1472af69d88dSmrg bc->cf_last->op != CF_OP_TEX || 14733464ebd5Sriastradh bc->force_add_cf) { 1474af69d88dSmrg r = r600_bytecode_add_cf(bc); 14753464ebd5Sriastradh if (r) { 14763464ebd5Sriastradh free(ntex); 14773464ebd5Sriastradh return r; 14783464ebd5Sriastradh } 1479af69d88dSmrg bc->cf_last->op = CF_OP_TEX; 14803464ebd5Sriastradh } 14813464ebd5Sriastradh if (ntex->src_gpr >= bc->ngpr) { 14823464ebd5Sriastradh bc->ngpr = ntex->src_gpr + 1; 14833464ebd5Sriastradh } 14843464ebd5Sriastradh if (ntex->dst_gpr >= bc->ngpr) { 14853464ebd5Sriastradh bc->ngpr = ntex->dst_gpr + 1; 14863464ebd5Sriastradh } 14877ec681f3Smrg list_addtail(&ntex->list, &bc->cf_last->tex); 14883464ebd5Sriastradh /* each texture fetch use 4 dwords */ 14893464ebd5Sriastradh bc->cf_last->ndw += 4; 14903464ebd5Sriastradh bc->ndw += 4; 1491af69d88dSmrg if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 14923464ebd5Sriastradh bc->force_add_cf = 1; 14933464ebd5Sriastradh return 0; 14943464ebd5Sriastradh} 14953464ebd5Sriastradh 149601e04c3fSmrgint r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds) 149701e04c3fSmrg{ 149801e04c3fSmrg struct r600_bytecode_gds *ngds = r600_bytecode_gds(); 149901e04c3fSmrg int r; 150001e04c3fSmrg 150101e04c3fSmrg if (ngds == NULL) 150201e04c3fSmrg return -ENOMEM; 150301e04c3fSmrg memcpy(ngds, gds, sizeof(struct r600_bytecode_gds)); 150401e04c3fSmrg 150501e04c3fSmrg if (bc->chip_class >= EVERGREEN) { 150601e04c3fSmrg if (gds->uav_index_mode) 150701e04c3fSmrg egcm_load_index_reg(bc, gds->uav_index_mode - 1, false); 150801e04c3fSmrg } 150901e04c3fSmrg 151001e04c3fSmrg if (bc->cf_last == NULL || 151101e04c3fSmrg bc->cf_last->op != CF_OP_GDS || 151201e04c3fSmrg bc->force_add_cf) { 151301e04c3fSmrg r = r600_bytecode_add_cf(bc); 151401e04c3fSmrg if (r) { 151501e04c3fSmrg free(ngds); 151601e04c3fSmrg return r; 151701e04c3fSmrg } 151801e04c3fSmrg bc->cf_last->op = CF_OP_GDS; 151901e04c3fSmrg } 152001e04c3fSmrg 15217ec681f3Smrg list_addtail(&ngds->list, &bc->cf_last->gds); 152201e04c3fSmrg bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */ 152301e04c3fSmrg if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 152401e04c3fSmrg bc->force_add_cf = 1; 152501e04c3fSmrg return 0; 152601e04c3fSmrg} 152701e04c3fSmrg 1528af69d88dSmrgint r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op) 15293464ebd5Sriastradh{ 15303464ebd5Sriastradh int r; 153101e04c3fSmrg 153201e04c3fSmrg /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */ 153301e04c3fSmrg if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) { 153401e04c3fSmrg bc->need_wait_ack = false; 153501e04c3fSmrg r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK); 153601e04c3fSmrg } 153701e04c3fSmrg 1538af69d88dSmrg r = r600_bytecode_add_cf(bc); 15393464ebd5Sriastradh if (r) 15403464ebd5Sriastradh return r; 15413464ebd5Sriastradh 15423464ebd5Sriastradh bc->cf_last->cond = V_SQ_CF_COND_ACTIVE; 1543af69d88dSmrg bc->cf_last->op = op; 15443464ebd5Sriastradh return 0; 15453464ebd5Sriastradh} 15463464ebd5Sriastradh 1547af69d88dSmrgint cm_bytecode_add_cf_end(struct r600_bytecode *bc) 15483464ebd5Sriastradh{ 1549af69d88dSmrg return r600_bytecode_add_cfinst(bc, CF_OP_CF_END); 15503464ebd5Sriastradh} 15513464ebd5Sriastradh 15523464ebd5Sriastradh/* common to all 3 families */ 1553af69d88dSmrgstatic int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) 15543464ebd5Sriastradh{ 155501e04c3fSmrg if (r600_isa_fetch(vtx->op)->flags & FF_MEM) 155601e04c3fSmrg return r700_bytecode_fetch_mem_build(bc, vtx, id); 155701e04c3fSmrg bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) | 155801e04c3fSmrg S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | 15593464ebd5Sriastradh S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | 15603464ebd5Sriastradh S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | 15613464ebd5Sriastradh S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); 1562af69d88dSmrg if (bc->chip_class < CAYMAN) 15633464ebd5Sriastradh bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); 15643464ebd5Sriastradh id++; 15653464ebd5Sriastradh bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | 15663464ebd5Sriastradh S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | 15673464ebd5Sriastradh S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | 15683464ebd5Sriastradh S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | 15693464ebd5Sriastradh S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | 15703464ebd5Sriastradh S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | 15713464ebd5Sriastradh S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | 15723464ebd5Sriastradh S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | 15733464ebd5Sriastradh S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | 15743464ebd5Sriastradh S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); 15753464ebd5Sriastradh bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| 15763464ebd5Sriastradh S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); 157701e04c3fSmrg if (bc->chip_class >= EVERGREEN) 157801e04c3fSmrg bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode); 1579af69d88dSmrg if (bc->chip_class < CAYMAN) 15803464ebd5Sriastradh bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); 15813464ebd5Sriastradh id++; 15823464ebd5Sriastradh bc->bytecode[id++] = 0; 15833464ebd5Sriastradh return 0; 15843464ebd5Sriastradh} 15853464ebd5Sriastradh 15863464ebd5Sriastradh/* common to all 3 families */ 1587af69d88dSmrgstatic int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) 15883464ebd5Sriastradh{ 158901e04c3fSmrg bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST( 1590af69d88dSmrg r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) | 1591af69d88dSmrg EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) | 15923464ebd5Sriastradh S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | 15933464ebd5Sriastradh S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | 15943464ebd5Sriastradh S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); 159501e04c3fSmrg if (bc->chip_class >= EVERGREEN) 159601e04c3fSmrg bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode); 159701e04c3fSmrg ((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode) 159801e04c3fSmrg id++; 15993464ebd5Sriastradh bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | 16003464ebd5Sriastradh S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | 16013464ebd5Sriastradh S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | 16023464ebd5Sriastradh S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | 16033464ebd5Sriastradh S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | 16043464ebd5Sriastradh S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | 16053464ebd5Sriastradh S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | 16063464ebd5Sriastradh S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | 16073464ebd5Sriastradh S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | 16083464ebd5Sriastradh S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | 16093464ebd5Sriastradh S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); 16103464ebd5Sriastradh bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | 16113464ebd5Sriastradh S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | 16123464ebd5Sriastradh S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | 16133464ebd5Sriastradh S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | 16143464ebd5Sriastradh S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | 16153464ebd5Sriastradh S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | 16163464ebd5Sriastradh S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | 16173464ebd5Sriastradh S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); 16183464ebd5Sriastradh bc->bytecode[id++] = 0; 16193464ebd5Sriastradh return 0; 16203464ebd5Sriastradh} 16213464ebd5Sriastradh 16223464ebd5Sriastradh/* r600 only, r700/eg bits in r700_asm.c */ 1623af69d88dSmrgstatic int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) 16243464ebd5Sriastradh{ 1625af69d88dSmrg unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op); 1626af69d88dSmrg 16273464ebd5Sriastradh /* don't replace gpr by pv or ps for destination register */ 16283464ebd5Sriastradh bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | 16293464ebd5Sriastradh S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | 16303464ebd5Sriastradh S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | 16313464ebd5Sriastradh S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | 16323464ebd5Sriastradh S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | 16333464ebd5Sriastradh S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | 16343464ebd5Sriastradh S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | 16353464ebd5Sriastradh S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | 1636af69d88dSmrg S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) | 1637af69d88dSmrg S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) | 16383464ebd5Sriastradh S_SQ_ALU_WORD0_LAST(alu->last); 16393464ebd5Sriastradh 16403464ebd5Sriastradh if (alu->is_op3) { 164101e04c3fSmrg assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); 16423464ebd5Sriastradh bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 16433464ebd5Sriastradh S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 16443464ebd5Sriastradh S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 16453464ebd5Sriastradh S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 16463464ebd5Sriastradh S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | 16473464ebd5Sriastradh S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | 16483464ebd5Sriastradh S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | 16493464ebd5Sriastradh S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | 1650af69d88dSmrg S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) | 16513464ebd5Sriastradh S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle); 16523464ebd5Sriastradh } else { 16533464ebd5Sriastradh bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 16543464ebd5Sriastradh S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 16553464ebd5Sriastradh S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 16563464ebd5Sriastradh S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 16573464ebd5Sriastradh S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | 16583464ebd5Sriastradh S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | 16593464ebd5Sriastradh S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | 16603464ebd5Sriastradh S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | 1661af69d88dSmrg S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) | 16623464ebd5Sriastradh S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | 1663af69d88dSmrg S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) | 1664af69d88dSmrg S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred); 16653464ebd5Sriastradh } 16663464ebd5Sriastradh return 0; 16673464ebd5Sriastradh} 16683464ebd5Sriastradh 1669af69d88dSmrgstatic void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) 16703464ebd5Sriastradh{ 16713464ebd5Sriastradh *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); 1672af69d88dSmrg *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) | 16733464ebd5Sriastradh S_SQ_CF_WORD1_BARRIER(1) | 167401e04c3fSmrg S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)| 167501e04c3fSmrg S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); 16763464ebd5Sriastradh} 16773464ebd5Sriastradh 16783464ebd5Sriastradh/* common for r600/r700 - eg in eg_asm.c */ 1679af69d88dSmrgstatic int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) 16803464ebd5Sriastradh{ 16813464ebd5Sriastradh unsigned id = cf->id; 1682af69d88dSmrg const struct cf_op_info *cfop = r600_isa_cf(cf->op); 1683af69d88dSmrg unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op); 16843464ebd5Sriastradh 1685af69d88dSmrg 1686af69d88dSmrg if (cf->op == CF_NATIVE) { 1687af69d88dSmrg bc->bytecode[id++] = cf->isa[0]; 1688af69d88dSmrg bc->bytecode[id++] = cf->isa[1]; 1689af69d88dSmrg } else if (cfop->flags & CF_ALU) { 16903464ebd5Sriastradh bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | 16913464ebd5Sriastradh S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | 16923464ebd5Sriastradh S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | 16933464ebd5Sriastradh S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); 16943464ebd5Sriastradh 1695af69d88dSmrg bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) | 16963464ebd5Sriastradh S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | 16973464ebd5Sriastradh S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | 16983464ebd5Sriastradh S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | 16993464ebd5Sriastradh S_SQ_CF_ALU_WORD1_BARRIER(1) | 1700af69d88dSmrg S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | 17013464ebd5Sriastradh S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); 1702af69d88dSmrg } else if (cfop->flags & CF_FETCH) { 1703af69d88dSmrg if (bc->chip_class == R700) 1704af69d88dSmrg r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 17053464ebd5Sriastradh else 1706af69d88dSmrg r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1707af69d88dSmrg } else if (cfop->flags & CF_EXP) { 17083464ebd5Sriastradh bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 17093464ebd5Sriastradh S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 17103464ebd5Sriastradh S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1711af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) | 1712af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr); 17133464ebd5Sriastradh bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 17143464ebd5Sriastradh S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | 17153464ebd5Sriastradh S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | 17163464ebd5Sriastradh S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | 17173464ebd5Sriastradh S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | 1718af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | 1719af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | 1720af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); 1721af69d88dSmrg } else if (cfop->flags & CF_MEM) { 1722af69d88dSmrg bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1723af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1724af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1725af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) | 1726af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr); 1727af69d88dSmrg bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1728af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | 1729af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | 1730af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) | 1731af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) | 1732af69d88dSmrg S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask); 1733af69d88dSmrg } else { 17343464ebd5Sriastradh bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); 1735af69d88dSmrg bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) | 17363464ebd5Sriastradh S_SQ_CF_WORD1_BARRIER(1) | 17373464ebd5Sriastradh S_SQ_CF_WORD1_COND(cf->cond) | 1738af69d88dSmrg S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | 1739af69d88dSmrg S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); 17403464ebd5Sriastradh } 17413464ebd5Sriastradh return 0; 17423464ebd5Sriastradh} 17433464ebd5Sriastradh 1744af69d88dSmrgint r600_bytecode_build(struct r600_bytecode *bc) 17453464ebd5Sriastradh{ 1746af69d88dSmrg struct r600_bytecode_cf *cf; 1747af69d88dSmrg struct r600_bytecode_alu *alu; 1748af69d88dSmrg struct r600_bytecode_vtx *vtx; 1749af69d88dSmrg struct r600_bytecode_tex *tex; 175001e04c3fSmrg struct r600_bytecode_gds *gds; 17513464ebd5Sriastradh uint32_t literal[4]; 17523464ebd5Sriastradh unsigned nliteral; 17533464ebd5Sriastradh unsigned addr; 17543464ebd5Sriastradh int i, r; 17553464ebd5Sriastradh 175601e04c3fSmrg if (!bc->nstack) { // If not 0, Stack_size already provided by llvm 175701e04c3fSmrg if (bc->stack.max_entries) 175801e04c3fSmrg bc->nstack = bc->stack.max_entries; 175901e04c3fSmrg else if (bc->type == PIPE_SHADER_VERTEX || 176001e04c3fSmrg bc->type == PIPE_SHADER_TESS_EVAL || 176101e04c3fSmrg bc->type == PIPE_SHADER_TESS_CTRL) 176201e04c3fSmrg bc->nstack = 1; 17633464ebd5Sriastradh } 17643464ebd5Sriastradh 17653464ebd5Sriastradh /* first path compute addr of each CF block */ 17663464ebd5Sriastradh /* addr start after all the CF instructions */ 17673464ebd5Sriastradh addr = bc->cf_last->id + 2; 17683464ebd5Sriastradh LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1769af69d88dSmrg if (r600_isa_cf(cf->op)->flags & CF_FETCH) { 17703464ebd5Sriastradh addr += 3; 17713464ebd5Sriastradh addr &= 0xFFFFFFFCUL; 17723464ebd5Sriastradh } 17733464ebd5Sriastradh cf->addr = addr; 17743464ebd5Sriastradh addr += cf->ndw; 17753464ebd5Sriastradh bc->ndw = cf->addr + cf->ndw; 17763464ebd5Sriastradh } 17773464ebd5Sriastradh free(bc->bytecode); 177801e04c3fSmrg bc->bytecode = calloc(4, bc->ndw); 17793464ebd5Sriastradh if (bc->bytecode == NULL) 17803464ebd5Sriastradh return -ENOMEM; 17813464ebd5Sriastradh LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1782af69d88dSmrg const struct cf_op_info *cfop = r600_isa_cf(cf->op); 17833464ebd5Sriastradh addr = cf->addr; 1784af69d88dSmrg if (bc->chip_class >= EVERGREEN) 1785af69d88dSmrg r = eg_bytecode_cf_build(bc, cf); 17863464ebd5Sriastradh else 1787af69d88dSmrg r = r600_bytecode_cf_build(bc, cf); 17883464ebd5Sriastradh if (r) 17893464ebd5Sriastradh return r; 1790af69d88dSmrg if (cfop->flags & CF_ALU) { 17913464ebd5Sriastradh nliteral = 0; 17923464ebd5Sriastradh memset(literal, 0, sizeof(literal)); 17933464ebd5Sriastradh LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 179401e04c3fSmrg r = r600_bytecode_alu_nliterals(alu, literal, &nliteral); 17953464ebd5Sriastradh if (r) 17963464ebd5Sriastradh return r; 179701e04c3fSmrg r600_bytecode_alu_adjust_literals(alu, literal, nliteral); 179801e04c3fSmrg r600_bytecode_assign_kcache_banks(alu, cf->kcache); 1799af69d88dSmrg 1800af69d88dSmrg switch(bc->chip_class) { 1801af69d88dSmrg case R600: 1802af69d88dSmrg r = r600_bytecode_alu_build(bc, alu, addr); 18033464ebd5Sriastradh break; 1804af69d88dSmrg case R700: 1805af69d88dSmrg r = r700_bytecode_alu_build(bc, alu, addr); 18063464ebd5Sriastradh break; 180701e04c3fSmrg case EVERGREEN: 180801e04c3fSmrg case CAYMAN: 180901e04c3fSmrg r = eg_bytecode_alu_build(bc, alu, addr); 181001e04c3fSmrg break; 18113464ebd5Sriastradh default: 1812af69d88dSmrg R600_ERR("unknown chip class %d.\n", bc->chip_class); 18133464ebd5Sriastradh return -EINVAL; 18143464ebd5Sriastradh } 18153464ebd5Sriastradh if (r) 18163464ebd5Sriastradh return r; 18173464ebd5Sriastradh addr += 2; 18183464ebd5Sriastradh if (alu->last) { 18193464ebd5Sriastradh for (i = 0; i < align(nliteral, 2); ++i) { 18203464ebd5Sriastradh bc->bytecode[addr++] = literal[i]; 18213464ebd5Sriastradh } 18223464ebd5Sriastradh nliteral = 0; 18233464ebd5Sriastradh memset(literal, 0, sizeof(literal)); 18243464ebd5Sriastradh } 18253464ebd5Sriastradh } 1826af69d88dSmrg } else if (cf->op == CF_OP_VTX) { 18273464ebd5Sriastradh LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 1828af69d88dSmrg r = r600_bytecode_vtx_build(bc, vtx, addr); 18293464ebd5Sriastradh if (r) 18303464ebd5Sriastradh return r; 18313464ebd5Sriastradh addr += 4; 18323464ebd5Sriastradh } 183301e04c3fSmrg } else if (cf->op == CF_OP_GDS) { 183401e04c3fSmrg assert(bc->chip_class >= EVERGREEN); 183501e04c3fSmrg LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { 183601e04c3fSmrg r = eg_bytecode_gds_build(bc, gds, addr); 183701e04c3fSmrg if (r) 183801e04c3fSmrg return r; 183901e04c3fSmrg addr += 4; 184001e04c3fSmrg } 1841af69d88dSmrg } else if (cf->op == CF_OP_TEX) { 1842af69d88dSmrg LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 1843af69d88dSmrg assert(bc->chip_class >= EVERGREEN); 1844af69d88dSmrg r = r600_bytecode_vtx_build(bc, vtx, addr); 1845af69d88dSmrg if (r) 1846af69d88dSmrg return r; 1847af69d88dSmrg addr += 4; 18483464ebd5Sriastradh } 18493464ebd5Sriastradh LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 1850af69d88dSmrg r = r600_bytecode_tex_build(bc, tex, addr); 18513464ebd5Sriastradh if (r) 18523464ebd5Sriastradh return r; 18533464ebd5Sriastradh addr += 4; 18543464ebd5Sriastradh } 18553464ebd5Sriastradh } 18563464ebd5Sriastradh } 18573464ebd5Sriastradh return 0; 18583464ebd5Sriastradh} 18593464ebd5Sriastradh 1860af69d88dSmrgvoid r600_bytecode_clear(struct r600_bytecode *bc) 18613464ebd5Sriastradh{ 1862af69d88dSmrg struct r600_bytecode_cf *cf = NULL, *next_cf; 18633464ebd5Sriastradh 18643464ebd5Sriastradh free(bc->bytecode); 18653464ebd5Sriastradh bc->bytecode = NULL; 18663464ebd5Sriastradh 18673464ebd5Sriastradh LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { 1868af69d88dSmrg struct r600_bytecode_alu *alu = NULL, *next_alu; 1869af69d88dSmrg struct r600_bytecode_tex *tex = NULL, *next_tex; 1870af69d88dSmrg struct r600_bytecode_tex *vtx = NULL, *next_vtx; 187101e04c3fSmrg struct r600_bytecode_gds *gds = NULL, *next_gds; 18723464ebd5Sriastradh 18733464ebd5Sriastradh LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { 18743464ebd5Sriastradh free(alu); 18753464ebd5Sriastradh } 18763464ebd5Sriastradh 18777ec681f3Smrg list_inithead(&cf->alu); 18783464ebd5Sriastradh 18793464ebd5Sriastradh LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) { 18803464ebd5Sriastradh free(tex); 18813464ebd5Sriastradh } 18823464ebd5Sriastradh 18837ec681f3Smrg list_inithead(&cf->tex); 18843464ebd5Sriastradh 18853464ebd5Sriastradh LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) { 18863464ebd5Sriastradh free(vtx); 18873464ebd5Sriastradh } 18883464ebd5Sriastradh 18897ec681f3Smrg list_inithead(&cf->vtx); 18903464ebd5Sriastradh 189101e04c3fSmrg LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) { 189201e04c3fSmrg free(gds); 189301e04c3fSmrg } 189401e04c3fSmrg 18957ec681f3Smrg list_inithead(&cf->gds); 189601e04c3fSmrg 18973464ebd5Sriastradh free(cf); 18983464ebd5Sriastradh } 18993464ebd5Sriastradh 19007ec681f3Smrg list_inithead(&cf->list); 19013464ebd5Sriastradh} 19023464ebd5Sriastradh 1903af69d88dSmrgstatic int print_swizzle(unsigned swz) 1904af69d88dSmrg{ 1905af69d88dSmrg const char * swzchars = "xyzw01?_"; 1906af69d88dSmrg assert(swz<8 && swz != 6); 1907af69d88dSmrg return fprintf(stderr, "%c", swzchars[swz]); 1908af69d88dSmrg} 1909af69d88dSmrg 1910af69d88dSmrgstatic int print_sel(unsigned sel, unsigned rel, unsigned index_mode, 1911af69d88dSmrg unsigned need_brackets) 1912af69d88dSmrg{ 1913af69d88dSmrg int o = 0; 1914af69d88dSmrg if (rel && index_mode >= 5 && sel < 128) 1915af69d88dSmrg o += fprintf(stderr, "G"); 1916af69d88dSmrg if (rel || need_brackets) { 1917af69d88dSmrg o += fprintf(stderr, "["); 1918af69d88dSmrg } 1919af69d88dSmrg o += fprintf(stderr, "%d", sel); 1920af69d88dSmrg if (rel) { 1921af69d88dSmrg if (index_mode == 0 || index_mode == 6) 1922af69d88dSmrg o += fprintf(stderr, "+AR"); 1923af69d88dSmrg else if (index_mode == 4) 1924af69d88dSmrg o += fprintf(stderr, "+AL"); 1925af69d88dSmrg } 1926af69d88dSmrg if (rel || need_brackets) { 1927af69d88dSmrg o += fprintf(stderr, "]"); 1928af69d88dSmrg } 1929af69d88dSmrg return o; 1930af69d88dSmrg} 1931af69d88dSmrg 1932af69d88dSmrgstatic int print_dst(struct r600_bytecode_alu *alu) 1933af69d88dSmrg{ 1934af69d88dSmrg int o = 0; 1935af69d88dSmrg unsigned sel = alu->dst.sel; 1936af69d88dSmrg char reg_char = 'R'; 1937af69d88dSmrg if (sel > 128 - 4) { /* clause temporary gpr */ 1938af69d88dSmrg sel -= 128 - 4; 1939af69d88dSmrg reg_char = 'T'; 1940af69d88dSmrg } 1941af69d88dSmrg 194201e04c3fSmrg if (alu_writes(alu)) { 1943af69d88dSmrg o += fprintf(stderr, "%c", reg_char); 1944af69d88dSmrg o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0); 1945af69d88dSmrg } else { 1946af69d88dSmrg o += fprintf(stderr, "__"); 1947af69d88dSmrg } 1948af69d88dSmrg o += fprintf(stderr, "."); 1949af69d88dSmrg o += print_swizzle(alu->dst.chan); 1950af69d88dSmrg return o; 1951af69d88dSmrg} 1952af69d88dSmrg 1953af69d88dSmrgstatic int print_src(struct r600_bytecode_alu *alu, unsigned idx) 19543464ebd5Sriastradh{ 1955af69d88dSmrg int o = 0; 1956af69d88dSmrg struct r600_bytecode_alu_src *src = &alu->src[idx]; 1957af69d88dSmrg unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0; 1958af69d88dSmrg 1959af69d88dSmrg if (src->neg) 1960af69d88dSmrg o += fprintf(stderr,"-"); 1961af69d88dSmrg if (src->abs) 1962af69d88dSmrg o += fprintf(stderr,"|"); 1963af69d88dSmrg 1964af69d88dSmrg if (sel < 128 - 4) { 1965af69d88dSmrg o += fprintf(stderr, "R"); 1966af69d88dSmrg } else if (sel < 128) { 1967af69d88dSmrg o += fprintf(stderr, "T"); 1968af69d88dSmrg sel -= 128 - 4; 1969af69d88dSmrg } else if (sel < 160) { 1970af69d88dSmrg o += fprintf(stderr, "KC0"); 1971af69d88dSmrg need_brackets = 1; 1972af69d88dSmrg sel -= 128; 1973af69d88dSmrg } else if (sel < 192) { 1974af69d88dSmrg o += fprintf(stderr, "KC1"); 1975af69d88dSmrg need_brackets = 1; 1976af69d88dSmrg sel -= 160; 1977af69d88dSmrg } else if (sel >= 512) { 1978af69d88dSmrg o += fprintf(stderr, "C%d", src->kc_bank); 1979af69d88dSmrg need_brackets = 1; 1980af69d88dSmrg sel -= 512; 1981af69d88dSmrg } else if (sel >= 448) { 1982af69d88dSmrg o += fprintf(stderr, "Param"); 1983af69d88dSmrg sel -= 448; 1984af69d88dSmrg need_chan = 0; 1985af69d88dSmrg } else if (sel >= 288) { 1986af69d88dSmrg o += fprintf(stderr, "KC3"); 1987af69d88dSmrg need_brackets = 1; 1988af69d88dSmrg sel -= 288; 1989af69d88dSmrg } else if (sel >= 256) { 1990af69d88dSmrg o += fprintf(stderr, "KC2"); 1991af69d88dSmrg need_brackets = 1; 1992af69d88dSmrg sel -= 256; 1993af69d88dSmrg } else { 1994af69d88dSmrg need_sel = 0; 1995af69d88dSmrg need_chan = 0; 1996af69d88dSmrg switch (sel) { 199701e04c3fSmrg case EG_V_SQ_ALU_SRC_LDS_DIRECT_A: 199801e04c3fSmrg o += fprintf(stderr, "LDS_A[0x%08X]", src->value); 199901e04c3fSmrg break; 200001e04c3fSmrg case EG_V_SQ_ALU_SRC_LDS_DIRECT_B: 200101e04c3fSmrg o += fprintf(stderr, "LDS_B[0x%08X]", src->value); 200201e04c3fSmrg break; 200301e04c3fSmrg case EG_V_SQ_ALU_SRC_LDS_OQ_A: 200401e04c3fSmrg o += fprintf(stderr, "LDS_OQ_A"); 200501e04c3fSmrg need_chan = 1; 200601e04c3fSmrg break; 200701e04c3fSmrg case EG_V_SQ_ALU_SRC_LDS_OQ_B: 200801e04c3fSmrg o += fprintf(stderr, "LDS_OQ_B"); 200901e04c3fSmrg need_chan = 1; 201001e04c3fSmrg break; 201101e04c3fSmrg case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP: 201201e04c3fSmrg o += fprintf(stderr, "LDS_OQ_A_POP"); 201301e04c3fSmrg need_chan = 1; 201401e04c3fSmrg break; 201501e04c3fSmrg case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP: 201601e04c3fSmrg o += fprintf(stderr, "LDS_OQ_B_POP"); 201701e04c3fSmrg need_chan = 1; 201801e04c3fSmrg break; 201901e04c3fSmrg case EG_V_SQ_ALU_SRC_TIME_LO: 202001e04c3fSmrg o += fprintf(stderr, "TIME_LO"); 202101e04c3fSmrg break; 202201e04c3fSmrg case EG_V_SQ_ALU_SRC_TIME_HI: 202301e04c3fSmrg o += fprintf(stderr, "TIME_HI"); 202401e04c3fSmrg break; 202501e04c3fSmrg case EG_V_SQ_ALU_SRC_SE_ID: 202601e04c3fSmrg o += fprintf(stderr, "SE_ID"); 202701e04c3fSmrg break; 202801e04c3fSmrg case EG_V_SQ_ALU_SRC_SIMD_ID: 202901e04c3fSmrg o += fprintf(stderr, "SIMD_ID"); 203001e04c3fSmrg break; 203101e04c3fSmrg case EG_V_SQ_ALU_SRC_HW_WAVE_ID: 203201e04c3fSmrg o += fprintf(stderr, "HW_WAVE_ID"); 203301e04c3fSmrg break; 2034af69d88dSmrg case V_SQ_ALU_SRC_PS: 2035af69d88dSmrg o += fprintf(stderr, "PS"); 2036af69d88dSmrg break; 2037af69d88dSmrg case V_SQ_ALU_SRC_PV: 2038af69d88dSmrg o += fprintf(stderr, "PV"); 2039af69d88dSmrg need_chan = 1; 2040af69d88dSmrg break; 2041af69d88dSmrg case V_SQ_ALU_SRC_LITERAL: 204201e04c3fSmrg o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value)); 2043af69d88dSmrg break; 2044af69d88dSmrg case V_SQ_ALU_SRC_0_5: 2045af69d88dSmrg o += fprintf(stderr, "0.5"); 2046af69d88dSmrg break; 2047af69d88dSmrg case V_SQ_ALU_SRC_M_1_INT: 2048af69d88dSmrg o += fprintf(stderr, "-1"); 2049af69d88dSmrg break; 2050af69d88dSmrg case V_SQ_ALU_SRC_1_INT: 2051af69d88dSmrg o += fprintf(stderr, "1"); 2052af69d88dSmrg break; 2053af69d88dSmrg case V_SQ_ALU_SRC_1: 2054af69d88dSmrg o += fprintf(stderr, "1.0"); 2055af69d88dSmrg break; 2056af69d88dSmrg case V_SQ_ALU_SRC_0: 2057af69d88dSmrg o += fprintf(stderr, "0"); 2058af69d88dSmrg break; 2059af69d88dSmrg default: 2060af69d88dSmrg o += fprintf(stderr, "??IMM_%d", sel); 2061af69d88dSmrg break; 2062af69d88dSmrg } 2063af69d88dSmrg } 20643464ebd5Sriastradh 2065af69d88dSmrg if (need_sel) 2066af69d88dSmrg o += print_sel(sel, src->rel, alu->index_mode, need_brackets); 2067af69d88dSmrg 2068af69d88dSmrg if (need_chan) { 2069af69d88dSmrg o += fprintf(stderr, "."); 2070af69d88dSmrg o += print_swizzle(src->chan); 2071af69d88dSmrg } 2072af69d88dSmrg 2073af69d88dSmrg if (src->abs) 2074af69d88dSmrg o += fprintf(stderr,"|"); 2075af69d88dSmrg 2076af69d88dSmrg return o; 2077af69d88dSmrg} 2078af69d88dSmrg 2079af69d88dSmrgstatic int print_indent(int p, int c) 2080af69d88dSmrg{ 2081af69d88dSmrg int o = 0; 2082af69d88dSmrg while (p++ < c) 2083af69d88dSmrg o += fprintf(stderr, " "); 2084af69d88dSmrg return o; 2085af69d88dSmrg} 2086af69d88dSmrg 2087af69d88dSmrgvoid r600_bytecode_disasm(struct r600_bytecode *bc) 2088af69d88dSmrg{ 208901e04c3fSmrg const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"}; 2090af69d88dSmrg static int index = 0; 2091af69d88dSmrg struct r600_bytecode_cf *cf = NULL; 2092af69d88dSmrg struct r600_bytecode_alu *alu = NULL; 2093af69d88dSmrg struct r600_bytecode_vtx *vtx = NULL; 2094af69d88dSmrg struct r600_bytecode_tex *tex = NULL; 209501e04c3fSmrg struct r600_bytecode_gds *gds = NULL; 2096af69d88dSmrg 2097af69d88dSmrg unsigned i, id, ngr = 0, last; 20983464ebd5Sriastradh uint32_t literal[4]; 20993464ebd5Sriastradh unsigned nliteral; 21003464ebd5Sriastradh char chip = '6'; 21013464ebd5Sriastradh 2102af69d88dSmrg switch (bc->chip_class) { 2103af69d88dSmrg case R700: 21043464ebd5Sriastradh chip = '7'; 21053464ebd5Sriastradh break; 2106af69d88dSmrg case EVERGREEN: 21073464ebd5Sriastradh chip = 'E'; 21083464ebd5Sriastradh break; 2109af69d88dSmrg case CAYMAN: 21103464ebd5Sriastradh chip = 'C'; 21113464ebd5Sriastradh break; 2112af69d88dSmrg case R600: 21133464ebd5Sriastradh default: 21143464ebd5Sriastradh chip = '6'; 21153464ebd5Sriastradh break; 21163464ebd5Sriastradh } 2117af69d88dSmrg fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n", 2118af69d88dSmrg bc->ndw, bc->ngpr, bc->nstack); 2119af69d88dSmrg fprintf(stderr, "shader %d -- %c\n", index++, chip); 21203464ebd5Sriastradh 21213464ebd5Sriastradh LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 21223464ebd5Sriastradh id = cf->id; 2123af69d88dSmrg if (cf->op == CF_NATIVE) { 2124af69d88dSmrg fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id], 2125af69d88dSmrg bc->bytecode[id + 1]); 2126af69d88dSmrg } else { 2127af69d88dSmrg const struct cf_op_info *cfop = r600_isa_cf(cf->op); 2128af69d88dSmrg if (cfop->flags & CF_ALU) { 2129af69d88dSmrg if (cf->eg_alu_extended) { 2130af69d88dSmrg fprintf(stderr, "%04d %08X %08X %s\n", id, bc->bytecode[id], 2131af69d88dSmrg bc->bytecode[id + 1], "ALU_EXT"); 2132af69d88dSmrg id += 2; 2133af69d88dSmrg } 2134af69d88dSmrg fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2135af69d88dSmrg bc->bytecode[id + 1], cfop->name); 2136af69d88dSmrg fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr); 2137af69d88dSmrg for (i = 0; i < 4; ++i) { 2138af69d88dSmrg if (cf->kcache[i].mode) { 2139af69d88dSmrg int c_start = (cf->kcache[i].addr << 4); 2140af69d88dSmrg int c_end = c_start + (cf->kcache[i].mode << 4); 214101e04c3fSmrg fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ", 214201e04c3fSmrg i, cf->kcache[i].bank, c_start, c_end, 214301e04c3fSmrg cf->kcache[i].index_mode ? " " : "", 214401e04c3fSmrg cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : ""); 2145af69d88dSmrg } 2146af69d88dSmrg } 2147af69d88dSmrg fprintf(stderr, "\n"); 2148af69d88dSmrg } else if (cfop->flags & CF_FETCH) { 2149af69d88dSmrg fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2150af69d88dSmrg bc->bytecode[id + 1], cfop->name); 2151af69d88dSmrg fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr); 215201e04c3fSmrg if (cf->vpm) 215301e04c3fSmrg fprintf(stderr, "VPM "); 215401e04c3fSmrg if (cf->end_of_program) 215501e04c3fSmrg fprintf(stderr, "EOP "); 2156af69d88dSmrg fprintf(stderr, "\n"); 215701e04c3fSmrg 2158af69d88dSmrg } else if (cfop->flags & CF_EXP) { 2159af69d88dSmrg int o = 0; 2160af69d88dSmrg const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; 2161af69d88dSmrg o += fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2162af69d88dSmrg bc->bytecode[id + 1], cfop->name); 2163af69d88dSmrg o += print_indent(o, 43); 2164af69d88dSmrg o += fprintf(stderr, "%s ", exp_type[cf->output.type]); 2165af69d88dSmrg if (cf->output.burst_count > 1) { 2166af69d88dSmrg o += fprintf(stderr, "%d-%d ", cf->output.array_base, 2167af69d88dSmrg cf->output.array_base + cf->output.burst_count - 1); 2168af69d88dSmrg 2169af69d88dSmrg o += print_indent(o, 55); 2170af69d88dSmrg o += fprintf(stderr, "R%d-%d.", cf->output.gpr, 2171af69d88dSmrg cf->output.gpr + cf->output.burst_count - 1); 2172af69d88dSmrg } else { 2173af69d88dSmrg o += fprintf(stderr, "%d ", cf->output.array_base); 2174af69d88dSmrg o += print_indent(o, 55); 2175af69d88dSmrg o += fprintf(stderr, "R%d.", cf->output.gpr); 2176af69d88dSmrg } 21773464ebd5Sriastradh 2178af69d88dSmrg o += print_swizzle(cf->output.swizzle_x); 2179af69d88dSmrg o += print_swizzle(cf->output.swizzle_y); 2180af69d88dSmrg o += print_swizzle(cf->output.swizzle_z); 2181af69d88dSmrg o += print_swizzle(cf->output.swizzle_w); 2182af69d88dSmrg 2183af69d88dSmrg print_indent(o, 67); 2184af69d88dSmrg 2185af69d88dSmrg fprintf(stderr, " ES:%X ", cf->output.elem_size); 218601e04c3fSmrg if (cf->mark) 218701e04c3fSmrg fprintf(stderr, "MARK "); 2188af69d88dSmrg if (!cf->barrier) 2189af69d88dSmrg fprintf(stderr, "NO_BARRIER "); 2190af69d88dSmrg if (cf->end_of_program) 2191af69d88dSmrg fprintf(stderr, "EOP "); 2192af69d88dSmrg fprintf(stderr, "\n"); 2193af69d88dSmrg } else if (r600_isa_cf(cf->op)->flags & CF_MEM) { 2194af69d88dSmrg int o = 0; 2195af69d88dSmrg const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", 2196af69d88dSmrg "WRITE_IND_ACK"}; 2197af69d88dSmrg o += fprintf(stderr, "%04d %08X %08X %s ", id, 2198af69d88dSmrg bc->bytecode[id], bc->bytecode[id + 1], cfop->name); 2199af69d88dSmrg o += print_indent(o, 43); 2200af69d88dSmrg o += fprintf(stderr, "%s ", exp_type[cf->output.type]); 220101e04c3fSmrg 220201e04c3fSmrg if (r600_isa_cf(cf->op)->flags & CF_RAT) { 220301e04c3fSmrg o += fprintf(stderr, "RAT%d", cf->rat.id); 220401e04c3fSmrg if (cf->rat.index_mode) { 220501e04c3fSmrg o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1); 220601e04c3fSmrg } 220701e04c3fSmrg o += fprintf(stderr, " INST: %d ", cf->rat.inst); 220801e04c3fSmrg } 220901e04c3fSmrg 2210af69d88dSmrg if (cf->output.burst_count > 1) { 2211af69d88dSmrg o += fprintf(stderr, "%d-%d ", cf->output.array_base, 2212af69d88dSmrg cf->output.array_base + cf->output.burst_count - 1); 2213af69d88dSmrg o += print_indent(o, 55); 2214af69d88dSmrg o += fprintf(stderr, "R%d-%d.", cf->output.gpr, 2215af69d88dSmrg cf->output.gpr + cf->output.burst_count - 1); 2216af69d88dSmrg } else { 2217af69d88dSmrg o += fprintf(stderr, "%d ", cf->output.array_base); 2218af69d88dSmrg o += print_indent(o, 55); 2219af69d88dSmrg o += fprintf(stderr, "R%d.", cf->output.gpr); 2220af69d88dSmrg } 2221af69d88dSmrg for (i = 0; i < 4; ++i) { 2222af69d88dSmrg if (cf->output.comp_mask & (1 << i)) 2223af69d88dSmrg o += print_swizzle(i); 2224af69d88dSmrg else 2225af69d88dSmrg o += print_swizzle(7); 2226af69d88dSmrg } 2227af69d88dSmrg 222801e04c3fSmrg if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND || 222901e04c3fSmrg cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND) 2230af69d88dSmrg o += fprintf(stderr, " R%d", cf->output.index_gpr); 2231af69d88dSmrg 2232af69d88dSmrg o += print_indent(o, 67); 2233af69d88dSmrg 2234af69d88dSmrg fprintf(stderr, " ES:%i ", cf->output.elem_size); 2235af69d88dSmrg if (cf->output.array_size != 0xFFF) 2236af69d88dSmrg fprintf(stderr, "AS:%i ", cf->output.array_size); 223701e04c3fSmrg if (cf->mark) 223801e04c3fSmrg fprintf(stderr, "MARK "); 2239af69d88dSmrg if (!cf->barrier) 2240af69d88dSmrg fprintf(stderr, "NO_BARRIER "); 2241af69d88dSmrg if (cf->end_of_program) 2242af69d88dSmrg fprintf(stderr, "EOP "); 224301e04c3fSmrg 224401e04c3fSmrg if (cf->output.mark) 224501e04c3fSmrg fprintf(stderr, "MARK "); 224601e04c3fSmrg 2247af69d88dSmrg fprintf(stderr, "\n"); 2248af69d88dSmrg } else { 2249af69d88dSmrg fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], 2250af69d88dSmrg bc->bytecode[id + 1], cfop->name); 2251af69d88dSmrg fprintf(stderr, "@%d ", cf->cf_addr); 2252af69d88dSmrg if (cf->cond) 2253af69d88dSmrg fprintf(stderr, "CND:%X ", cf->cond); 2254af69d88dSmrg if (cf->pop_count) 2255af69d88dSmrg fprintf(stderr, "POP:%X ", cf->pop_count); 225601e04c3fSmrg if (cf->count && (cfop->flags & CF_EMIT)) 225701e04c3fSmrg fprintf(stderr, "STREAM%d ", cf->count); 225801e04c3fSmrg if (cf->vpm) 225901e04c3fSmrg fprintf(stderr, "VPM "); 226001e04c3fSmrg if (cf->end_of_program) 226101e04c3fSmrg fprintf(stderr, "EOP "); 2262af69d88dSmrg fprintf(stderr, "\n"); 2263af69d88dSmrg } 22643464ebd5Sriastradh } 22653464ebd5Sriastradh 22663464ebd5Sriastradh id = cf->addr; 22673464ebd5Sriastradh nliteral = 0; 2268af69d88dSmrg last = 1; 22693464ebd5Sriastradh LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2270af69d88dSmrg const char *omod_str[] = {"","*2","*4","/2"}; 2271af69d88dSmrg const struct alu_op_info *aop = r600_isa_alu(alu->op); 2272af69d88dSmrg int o = 0; 2273af69d88dSmrg 227401e04c3fSmrg r600_bytecode_alu_nliterals(alu, literal, &nliteral); 2275af69d88dSmrg o += fprintf(stderr, " %04d %08X %08X ", id, bc->bytecode[id], bc->bytecode[id+1]); 2276af69d88dSmrg if (last) 2277af69d88dSmrg o += fprintf(stderr, "%4d ", ++ngr); 2278af69d88dSmrg else 2279af69d88dSmrg o += fprintf(stderr, " "); 2280af69d88dSmrg o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ', 2281af69d88dSmrg alu->update_pred ? 'P':' ', 2282af69d88dSmrg alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' '); 2283af69d88dSmrg 2284af69d88dSmrg o += fprintf(stderr, "%s%s%s ", aop->name, 2285af69d88dSmrg omod_str[alu->omod], alu->dst.clamp ? "_sat":""); 2286af69d88dSmrg 2287af69d88dSmrg o += print_indent(o,60); 2288af69d88dSmrg o += print_dst(alu); 2289af69d88dSmrg for (i = 0; i < aop->src_count; ++i) { 2290af69d88dSmrg o += fprintf(stderr, i == 0 ? ", ": ", "); 2291af69d88dSmrg o += print_src(alu, i); 2292af69d88dSmrg } 2293af69d88dSmrg 2294af69d88dSmrg if (alu->bank_swizzle) { 2295af69d88dSmrg o += print_indent(o,75); 2296af69d88dSmrg o += fprintf(stderr, " BS:%d", alu->bank_swizzle); 22973464ebd5Sriastradh } 22983464ebd5Sriastradh 2299af69d88dSmrg fprintf(stderr, "\n"); 2300af69d88dSmrg id += 2; 2301af69d88dSmrg 23023464ebd5Sriastradh if (alu->last) { 23033464ebd5Sriastradh for (i = 0; i < nliteral; i++, id++) { 23043464ebd5Sriastradh float *f = (float*)(bc->bytecode + id); 2305af69d88dSmrg o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]); 2306af69d88dSmrg print_indent(o, 60); 2307af69d88dSmrg fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id)); 23083464ebd5Sriastradh } 23093464ebd5Sriastradh id += nliteral & 1; 23103464ebd5Sriastradh nliteral = 0; 23113464ebd5Sriastradh } 2312af69d88dSmrg last = alu->last; 23133464ebd5Sriastradh } 23143464ebd5Sriastradh 23153464ebd5Sriastradh LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2316af69d88dSmrg int o = 0; 2317af69d88dSmrg o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2318af69d88dSmrg bc->bytecode[id + 1], bc->bytecode[id + 2]); 2319af69d88dSmrg 2320af69d88dSmrg o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name); 2321af69d88dSmrg 2322af69d88dSmrg o += print_indent(o, 50); 2323af69d88dSmrg 2324af69d88dSmrg o += fprintf(stderr, "R%d.", tex->dst_gpr); 2325af69d88dSmrg o += print_swizzle(tex->dst_sel_x); 2326af69d88dSmrg o += print_swizzle(tex->dst_sel_y); 2327af69d88dSmrg o += print_swizzle(tex->dst_sel_z); 2328af69d88dSmrg o += print_swizzle(tex->dst_sel_w); 2329af69d88dSmrg 2330af69d88dSmrg o += fprintf(stderr, ", R%d.", tex->src_gpr); 2331af69d88dSmrg o += print_swizzle(tex->src_sel_x); 2332af69d88dSmrg o += print_swizzle(tex->src_sel_y); 2333af69d88dSmrg o += print_swizzle(tex->src_sel_z); 2334af69d88dSmrg o += print_swizzle(tex->src_sel_w); 2335af69d88dSmrg 2336af69d88dSmrg o += fprintf(stderr, ", RID:%d", tex->resource_id); 2337af69d88dSmrg o += fprintf(stderr, ", SID:%d ", tex->sampler_id); 2338af69d88dSmrg 233901e04c3fSmrg if (tex->sampler_index_mode) 234001e04c3fSmrg fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]); 234101e04c3fSmrg 2342af69d88dSmrg if (tex->lod_bias) 2343af69d88dSmrg fprintf(stderr, "LB:%d ", tex->lod_bias); 2344af69d88dSmrg 2345af69d88dSmrg fprintf(stderr, "CT:%c%c%c%c ", 2346af69d88dSmrg tex->coord_type_x ? 'N' : 'U', 2347af69d88dSmrg tex->coord_type_y ? 'N' : 'U', 2348af69d88dSmrg tex->coord_type_z ? 'N' : 'U', 2349af69d88dSmrg tex->coord_type_w ? 'N' : 'U'); 2350af69d88dSmrg 2351af69d88dSmrg if (tex->offset_x) 2352af69d88dSmrg fprintf(stderr, "OX:%d ", tex->offset_x); 2353af69d88dSmrg if (tex->offset_y) 2354af69d88dSmrg fprintf(stderr, "OY:%d ", tex->offset_y); 2355af69d88dSmrg if (tex->offset_z) 2356af69d88dSmrg fprintf(stderr, "OZ:%d ", tex->offset_z); 2357af69d88dSmrg 2358af69d88dSmrg id += 4; 2359af69d88dSmrg fprintf(stderr, "\n"); 23603464ebd5Sriastradh } 23613464ebd5Sriastradh 23623464ebd5Sriastradh LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2363af69d88dSmrg int o = 0; 2364af69d88dSmrg const char * fetch_type[] = {"VERTEX", "INSTANCE", ""}; 2365af69d88dSmrg o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 2366af69d88dSmrg bc->bytecode[id + 1], bc->bytecode[id + 2]); 2367af69d88dSmrg 2368af69d88dSmrg o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name); 2369af69d88dSmrg 2370af69d88dSmrg o += print_indent(o, 50); 2371af69d88dSmrg 2372af69d88dSmrg o += fprintf(stderr, "R%d.", vtx->dst_gpr); 2373af69d88dSmrg o += print_swizzle(vtx->dst_sel_x); 2374af69d88dSmrg o += print_swizzle(vtx->dst_sel_y); 2375af69d88dSmrg o += print_swizzle(vtx->dst_sel_z); 2376af69d88dSmrg o += print_swizzle(vtx->dst_sel_w); 2377af69d88dSmrg 2378af69d88dSmrg o += fprintf(stderr, ", R%d.", vtx->src_gpr); 2379af69d88dSmrg o += print_swizzle(vtx->src_sel_x); 238001e04c3fSmrg if (r600_isa_fetch(vtx->op)->flags & FF_MEM) 238101e04c3fSmrg o += print_swizzle(vtx->src_sel_y); 2382af69d88dSmrg 2383af69d88dSmrg if (vtx->offset) 2384af69d88dSmrg fprintf(stderr, " +%db", vtx->offset); 2385af69d88dSmrg 2386af69d88dSmrg o += print_indent(o, 55); 2387af69d88dSmrg 2388af69d88dSmrg fprintf(stderr, ", RID:%d ", vtx->buffer_id); 2389af69d88dSmrg 2390af69d88dSmrg fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]); 2391af69d88dSmrg 2392af69d88dSmrg if (bc->chip_class < CAYMAN && vtx->mega_fetch_count) 2393af69d88dSmrg fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count); 2394af69d88dSmrg 239501e04c3fSmrg if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode) 239601e04c3fSmrg fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]); 239701e04c3fSmrg 239801e04c3fSmrg if (r600_isa_fetch(vtx->op)->flags & FF_MEM) { 239901e04c3fSmrg if (vtx->uncached) 240001e04c3fSmrg fprintf(stderr, "UNCACHED "); 240101e04c3fSmrg if (vtx->indexed) 240201e04c3fSmrg fprintf(stderr, "INDEXED:%d ", vtx->indexed); 240301e04c3fSmrg 240401e04c3fSmrg fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size); 240501e04c3fSmrg if (vtx->burst_count) 240601e04c3fSmrg fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count); 240701e04c3fSmrg fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base); 240801e04c3fSmrg fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size); 240901e04c3fSmrg } 241001e04c3fSmrg 2411af69d88dSmrg fprintf(stderr, "UCF:%d ", vtx->use_const_fields); 2412af69d88dSmrg fprintf(stderr, "FMT(DTA:%d ", vtx->data_format); 24133464ebd5Sriastradh fprintf(stderr, "NUM:%d ", vtx->num_format_all); 24143464ebd5Sriastradh fprintf(stderr, "COMP:%d ", vtx->format_comp_all); 24153464ebd5Sriastradh fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); 2416af69d88dSmrg 2417af69d88dSmrg id += 4; 24183464ebd5Sriastradh } 241901e04c3fSmrg 242001e04c3fSmrg LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { 242101e04c3fSmrg int o = 0; 242201e04c3fSmrg o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], 242301e04c3fSmrg bc->bytecode[id + 1], bc->bytecode[id + 2]); 242401e04c3fSmrg 242501e04c3fSmrg o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name); 242601e04c3fSmrg 242701e04c3fSmrg if (gds->op != FETCH_OP_TF_WRITE) { 242801e04c3fSmrg o += fprintf(stderr, "R%d.", gds->dst_gpr); 242901e04c3fSmrg o += print_swizzle(gds->dst_sel_x); 243001e04c3fSmrg o += print_swizzle(gds->dst_sel_y); 243101e04c3fSmrg o += print_swizzle(gds->dst_sel_z); 243201e04c3fSmrg o += print_swizzle(gds->dst_sel_w); 243301e04c3fSmrg } 243401e04c3fSmrg 243501e04c3fSmrg o += fprintf(stderr, ", R%d.", gds->src_gpr); 243601e04c3fSmrg o += print_swizzle(gds->src_sel_x); 243701e04c3fSmrg o += print_swizzle(gds->src_sel_y); 243801e04c3fSmrg o += print_swizzle(gds->src_sel_z); 243901e04c3fSmrg 244001e04c3fSmrg if (gds->op != FETCH_OP_TF_WRITE) { 244101e04c3fSmrg o += fprintf(stderr, ", R%d.", gds->src_gpr2); 244201e04c3fSmrg } 244301e04c3fSmrg if (gds->alloc_consume) { 244401e04c3fSmrg o += fprintf(stderr, " UAV: %d", gds->uav_id); 244501e04c3fSmrg if (gds->uav_index_mode) 244601e04c3fSmrg o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]); 244701e04c3fSmrg } 244801e04c3fSmrg fprintf(stderr, "\n"); 244901e04c3fSmrg id += 4; 245001e04c3fSmrg } 24513464ebd5Sriastradh } 24523464ebd5Sriastradh 24533464ebd5Sriastradh fprintf(stderr, "--------------------------------------\n"); 24543464ebd5Sriastradh} 24553464ebd5Sriastradh 2456af69d88dSmrgvoid r600_vertex_data_type(enum pipe_format pformat, 2457af69d88dSmrg unsigned *format, 2458af69d88dSmrg unsigned *num_format, unsigned *format_comp, unsigned *endian) 24593464ebd5Sriastradh{ 24603464ebd5Sriastradh const struct util_format_description *desc; 24613464ebd5Sriastradh unsigned i; 24623464ebd5Sriastradh 24633464ebd5Sriastradh *format = 0; 24643464ebd5Sriastradh *num_format = 0; 24653464ebd5Sriastradh *format_comp = 0; 24663464ebd5Sriastradh *endian = ENDIAN_NONE; 24673464ebd5Sriastradh 2468af69d88dSmrg if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) { 2469af69d88dSmrg *format = FMT_10_11_11_FLOAT; 2470af69d88dSmrg *endian = r600_endian_swap(32); 2471af69d88dSmrg return; 2472af69d88dSmrg } 2473af69d88dSmrg 247401e04c3fSmrg if (pformat == PIPE_FORMAT_B5G6R5_UNORM) { 247501e04c3fSmrg *format = FMT_5_6_5; 247601e04c3fSmrg *endian = r600_endian_swap(16); 247701e04c3fSmrg return; 247801e04c3fSmrg } 247901e04c3fSmrg 248001e04c3fSmrg if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) { 248101e04c3fSmrg *format = FMT_1_5_5_5; 248201e04c3fSmrg *endian = r600_endian_swap(16); 248301e04c3fSmrg return; 248401e04c3fSmrg } 248501e04c3fSmrg 248601e04c3fSmrg if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) { 248701e04c3fSmrg *format = FMT_5_5_5_1; 248801e04c3fSmrg return; 248901e04c3fSmrg } 249001e04c3fSmrg 24913464ebd5Sriastradh desc = util_format_description(pformat); 24923464ebd5Sriastradh if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { 24933464ebd5Sriastradh goto out_unknown; 24943464ebd5Sriastradh } 24953464ebd5Sriastradh 24963464ebd5Sriastradh /* Find the first non-VOID channel. */ 24973464ebd5Sriastradh for (i = 0; i < 4; i++) { 24983464ebd5Sriastradh if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 24993464ebd5Sriastradh break; 25003464ebd5Sriastradh } 25013464ebd5Sriastradh } 25023464ebd5Sriastradh 25033464ebd5Sriastradh *endian = r600_endian_swap(desc->channel[i].size); 25043464ebd5Sriastradh 25053464ebd5Sriastradh switch (desc->channel[i].type) { 25063464ebd5Sriastradh /* Half-floats, floats, ints */ 25073464ebd5Sriastradh case UTIL_FORMAT_TYPE_FLOAT: 25083464ebd5Sriastradh switch (desc->channel[i].size) { 25093464ebd5Sriastradh case 16: 25103464ebd5Sriastradh switch (desc->nr_channels) { 25113464ebd5Sriastradh case 1: 25123464ebd5Sriastradh *format = FMT_16_FLOAT; 25133464ebd5Sriastradh break; 25143464ebd5Sriastradh case 2: 25153464ebd5Sriastradh *format = FMT_16_16_FLOAT; 25163464ebd5Sriastradh break; 25173464ebd5Sriastradh case 3: 25183464ebd5Sriastradh case 4: 25193464ebd5Sriastradh *format = FMT_16_16_16_16_FLOAT; 25203464ebd5Sriastradh break; 25213464ebd5Sriastradh } 25223464ebd5Sriastradh break; 25233464ebd5Sriastradh case 32: 25243464ebd5Sriastradh switch (desc->nr_channels) { 25253464ebd5Sriastradh case 1: 25263464ebd5Sriastradh *format = FMT_32_FLOAT; 25273464ebd5Sriastradh break; 25283464ebd5Sriastradh case 2: 25293464ebd5Sriastradh *format = FMT_32_32_FLOAT; 25303464ebd5Sriastradh break; 25313464ebd5Sriastradh case 3: 25323464ebd5Sriastradh *format = FMT_32_32_32_FLOAT; 25333464ebd5Sriastradh break; 25343464ebd5Sriastradh case 4: 25353464ebd5Sriastradh *format = FMT_32_32_32_32_FLOAT; 25363464ebd5Sriastradh break; 25373464ebd5Sriastradh } 25383464ebd5Sriastradh break; 25393464ebd5Sriastradh default: 25403464ebd5Sriastradh goto out_unknown; 25413464ebd5Sriastradh } 25423464ebd5Sriastradh break; 25433464ebd5Sriastradh /* Unsigned ints */ 25443464ebd5Sriastradh case UTIL_FORMAT_TYPE_UNSIGNED: 25453464ebd5Sriastradh /* Signed ints */ 25463464ebd5Sriastradh case UTIL_FORMAT_TYPE_SIGNED: 25473464ebd5Sriastradh switch (desc->channel[i].size) { 254801e04c3fSmrg case 4: 254901e04c3fSmrg switch (desc->nr_channels) { 255001e04c3fSmrg case 2: 255101e04c3fSmrg *format = FMT_4_4; 255201e04c3fSmrg break; 255301e04c3fSmrg case 4: 255401e04c3fSmrg *format = FMT_4_4_4_4; 255501e04c3fSmrg break; 255601e04c3fSmrg } 255701e04c3fSmrg break; 25583464ebd5Sriastradh case 8: 25593464ebd5Sriastradh switch (desc->nr_channels) { 25603464ebd5Sriastradh case 1: 25613464ebd5Sriastradh *format = FMT_8; 25623464ebd5Sriastradh break; 25633464ebd5Sriastradh case 2: 25643464ebd5Sriastradh *format = FMT_8_8; 25653464ebd5Sriastradh break; 25663464ebd5Sriastradh case 3: 25673464ebd5Sriastradh case 4: 25683464ebd5Sriastradh *format = FMT_8_8_8_8; 25693464ebd5Sriastradh break; 25703464ebd5Sriastradh } 25713464ebd5Sriastradh break; 2572af69d88dSmrg case 10: 2573af69d88dSmrg if (desc->nr_channels != 4) 2574af69d88dSmrg goto out_unknown; 2575af69d88dSmrg 2576af69d88dSmrg *format = FMT_2_10_10_10; 2577af69d88dSmrg break; 25783464ebd5Sriastradh case 16: 25793464ebd5Sriastradh switch (desc->nr_channels) { 25803464ebd5Sriastradh case 1: 25813464ebd5Sriastradh *format = FMT_16; 25823464ebd5Sriastradh break; 25833464ebd5Sriastradh case 2: 25843464ebd5Sriastradh *format = FMT_16_16; 25853464ebd5Sriastradh break; 25863464ebd5Sriastradh case 3: 25873464ebd5Sriastradh case 4: 25883464ebd5Sriastradh *format = FMT_16_16_16_16; 25893464ebd5Sriastradh break; 25903464ebd5Sriastradh } 25913464ebd5Sriastradh break; 25923464ebd5Sriastradh case 32: 25933464ebd5Sriastradh switch (desc->nr_channels) { 25943464ebd5Sriastradh case 1: 25953464ebd5Sriastradh *format = FMT_32; 25963464ebd5Sriastradh break; 25973464ebd5Sriastradh case 2: 25983464ebd5Sriastradh *format = FMT_32_32; 25993464ebd5Sriastradh break; 26003464ebd5Sriastradh case 3: 26013464ebd5Sriastradh *format = FMT_32_32_32; 26023464ebd5Sriastradh break; 26033464ebd5Sriastradh case 4: 26043464ebd5Sriastradh *format = FMT_32_32_32_32; 26053464ebd5Sriastradh break; 26063464ebd5Sriastradh } 26073464ebd5Sriastradh break; 26083464ebd5Sriastradh default: 26093464ebd5Sriastradh goto out_unknown; 26103464ebd5Sriastradh } 26113464ebd5Sriastradh break; 26123464ebd5Sriastradh default: 26133464ebd5Sriastradh goto out_unknown; 26143464ebd5Sriastradh } 26153464ebd5Sriastradh 26163464ebd5Sriastradh if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 26173464ebd5Sriastradh *format_comp = 1; 26183464ebd5Sriastradh } 2619af69d88dSmrg 2620af69d88dSmrg *num_format = 0; 2621af69d88dSmrg if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || 2622af69d88dSmrg desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2623af69d88dSmrg if (!desc->channel[i].normalized) { 2624af69d88dSmrg if (desc->channel[i].pure_integer) 2625af69d88dSmrg *num_format = 1; 2626af69d88dSmrg else 2627af69d88dSmrg *num_format = 2; 2628af69d88dSmrg } 26293464ebd5Sriastradh } 26303464ebd5Sriastradh return; 26313464ebd5Sriastradhout_unknown: 26323464ebd5Sriastradh R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); 26333464ebd5Sriastradh} 26343464ebd5Sriastradh 2635af69d88dSmrgvoid *r600_create_vertex_fetch_shader(struct pipe_context *ctx, 2636af69d88dSmrg unsigned count, 2637af69d88dSmrg const struct pipe_vertex_element *elements) 26383464ebd5Sriastradh{ 2639af69d88dSmrg struct r600_context *rctx = (struct r600_context *)ctx; 2640af69d88dSmrg struct r600_bytecode bc; 2641af69d88dSmrg struct r600_bytecode_vtx vtx; 26423464ebd5Sriastradh const struct util_format_description *desc; 2643af69d88dSmrg unsigned fetch_resource_start = rctx->b.chip_class >= EVERGREEN ? 0 : 160; 26443464ebd5Sriastradh unsigned format, num_format, format_comp, endian; 2645af69d88dSmrg uint32_t *bytecode; 2646af69d88dSmrg int i, j, r, fs_size; 2647af69d88dSmrg struct r600_fetch_shader *shader; 26487ec681f3Smrg unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB || 26497ec681f3Smrg (rctx->screen->b.debug_flags & DBG_NIR); 2650af69d88dSmrg unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 26513464ebd5Sriastradh 2652af69d88dSmrg assert(count < 32); 26533464ebd5Sriastradh 26543464ebd5Sriastradh memset(&bc, 0, sizeof(bc)); 2655af69d88dSmrg r600_bytecode_init(&bc, rctx->b.chip_class, rctx->b.family, 2656af69d88dSmrg rctx->screen->has_compressed_msaa_texturing); 26573464ebd5Sriastradh 2658af69d88dSmrg bc.isa = rctx->isa; 26593464ebd5Sriastradh 2660af69d88dSmrg for (i = 0; i < count; i++) { 2661af69d88dSmrg if (elements[i].instance_divisor > 1) { 2662af69d88dSmrg if (rctx->b.chip_class == CAYMAN) { 2663af69d88dSmrg for (j = 0; j < 4; j++) { 2664af69d88dSmrg struct r600_bytecode_alu alu; 2665af69d88dSmrg memset(&alu, 0, sizeof(alu)); 2666af69d88dSmrg alu.op = ALU_OP2_MULHI_UINT; 2667af69d88dSmrg alu.src[0].sel = 0; 2668af69d88dSmrg alu.src[0].chan = 3; 2669af69d88dSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2670af69d88dSmrg alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2671af69d88dSmrg alu.dst.sel = i + 1; 2672af69d88dSmrg alu.dst.chan = j; 2673af69d88dSmrg alu.dst.write = j == 3; 2674af69d88dSmrg alu.last = j == 3; 2675af69d88dSmrg if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2676af69d88dSmrg r600_bytecode_clear(&bc); 2677af69d88dSmrg return NULL; 2678af69d88dSmrg } 2679af69d88dSmrg } 2680af69d88dSmrg } else { 2681af69d88dSmrg struct r600_bytecode_alu alu; 2682af69d88dSmrg memset(&alu, 0, sizeof(alu)); 2683af69d88dSmrg alu.op = ALU_OP2_MULHI_UINT; 2684af69d88dSmrg alu.src[0].sel = 0; 2685af69d88dSmrg alu.src[0].chan = 3; 2686af69d88dSmrg alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2687af69d88dSmrg alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2688af69d88dSmrg alu.dst.sel = i + 1; 2689af69d88dSmrg alu.dst.chan = 3; 2690af69d88dSmrg alu.dst.write = 1; 2691af69d88dSmrg alu.last = 1; 2692af69d88dSmrg if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2693af69d88dSmrg r600_bytecode_clear(&bc); 2694af69d88dSmrg return NULL; 2695af69d88dSmrg } 26963464ebd5Sriastradh } 26973464ebd5Sriastradh } 26983464ebd5Sriastradh } 26993464ebd5Sriastradh 2700af69d88dSmrg for (i = 0; i < count; i++) { 2701af69d88dSmrg r600_vertex_data_type(elements[i].src_format, 2702af69d88dSmrg &format, &num_format, &format_comp, &endian); 2703af69d88dSmrg 2704af69d88dSmrg desc = util_format_description(elements[i].src_format); 270501e04c3fSmrg if (!desc) { 2706af69d88dSmrg r600_bytecode_clear(&bc); 2707af69d88dSmrg R600_ERR("unknown format %d\n", elements[i].src_format); 2708af69d88dSmrg return NULL; 2709af69d88dSmrg } 2710af69d88dSmrg 2711af69d88dSmrg if (elements[i].src_offset > 65535) { 2712af69d88dSmrg r600_bytecode_clear(&bc); 2713af69d88dSmrg R600_ERR("too big src_offset: %u\n", elements[i].src_offset); 2714af69d88dSmrg return NULL; 27153464ebd5Sriastradh } 27163464ebd5Sriastradh 27173464ebd5Sriastradh memset(&vtx, 0, sizeof(vtx)); 2718af69d88dSmrg vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; 271901e04c3fSmrg vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA; 27203464ebd5Sriastradh vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; 27213464ebd5Sriastradh vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; 27223464ebd5Sriastradh vtx.mega_fetch_count = 0x1F; 27233464ebd5Sriastradh vtx.dst_gpr = i + 1; 27243464ebd5Sriastradh vtx.dst_sel_x = desc->swizzle[0]; 27253464ebd5Sriastradh vtx.dst_sel_y = desc->swizzle[1]; 27263464ebd5Sriastradh vtx.dst_sel_z = desc->swizzle[2]; 27273464ebd5Sriastradh vtx.dst_sel_w = desc->swizzle[3]; 27283464ebd5Sriastradh vtx.data_format = format; 27293464ebd5Sriastradh vtx.num_format_all = num_format; 27303464ebd5Sriastradh vtx.format_comp_all = format_comp; 27313464ebd5Sriastradh vtx.offset = elements[i].src_offset; 27323464ebd5Sriastradh vtx.endian = endian; 27333464ebd5Sriastradh 2734af69d88dSmrg if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { 2735af69d88dSmrg r600_bytecode_clear(&bc); 2736af69d88dSmrg return NULL; 27373464ebd5Sriastradh } 27383464ebd5Sriastradh } 27393464ebd5Sriastradh 2740af69d88dSmrg r600_bytecode_add_cfinst(&bc, CF_OP_RET); 27413464ebd5Sriastradh 2742af69d88dSmrg if ((r = r600_bytecode_build(&bc))) { 2743af69d88dSmrg r600_bytecode_clear(&bc); 2744af69d88dSmrg return NULL; 27453464ebd5Sriastradh } 27463464ebd5Sriastradh 2747af69d88dSmrg if (rctx->screen->b.debug_flags & DBG_FS) { 27483464ebd5Sriastradh fprintf(stderr, "--------------------------------------------------------------\n"); 2749af69d88dSmrg fprintf(stderr, "Vertex elements state:\n"); 2750af69d88dSmrg for (i = 0; i < count; i++) { 2751af69d88dSmrg fprintf(stderr, " "); 2752af69d88dSmrg util_dump_vertex_element(stderr, elements+i); 2753af69d88dSmrg fprintf(stderr, "\n"); 2754af69d88dSmrg } 2755af69d88dSmrg 2756af69d88dSmrg if (!sb_disasm) { 2757af69d88dSmrg r600_bytecode_disasm(&bc); 2758af69d88dSmrg 2759af69d88dSmrg fprintf(stderr, "______________________________________________________________\n"); 2760af69d88dSmrg } else { 2761af69d88dSmrg r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/); 2762af69d88dSmrg } 27633464ebd5Sriastradh } 27643464ebd5Sriastradh 2765af69d88dSmrg fs_size = bc.ndw*4; 27663464ebd5Sriastradh 2767af69d88dSmrg /* Allocate the CSO. */ 2768af69d88dSmrg shader = CALLOC_STRUCT(r600_fetch_shader); 2769af69d88dSmrg if (!shader) { 2770af69d88dSmrg r600_bytecode_clear(&bc); 2771af69d88dSmrg return NULL; 27723464ebd5Sriastradh } 27733464ebd5Sriastradh 27747ec681f3Smrg u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256, 277501e04c3fSmrg &shader->offset, 2776af69d88dSmrg (struct pipe_resource**)&shader->buffer); 2777af69d88dSmrg if (!shader->buffer) { 2778af69d88dSmrg r600_bytecode_clear(&bc); 2779af69d88dSmrg FREE(shader); 2780af69d88dSmrg return NULL; 27813464ebd5Sriastradh } 27823464ebd5Sriastradh 27839f464c52Smaya bytecode = r600_buffer_map_sync_with_rings 27849f464c52Smaya (&rctx->b, shader->buffer, 27857ec681f3Smrg PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY); 2786af69d88dSmrg bytecode += shader->offset / 4; 2787af69d88dSmrg 27883464ebd5Sriastradh if (R600_BIG_ENDIAN) { 2789af69d88dSmrg for (i = 0; i < fs_size / 4; ++i) { 2790af69d88dSmrg bytecode[i] = util_cpu_to_le32(bc.bytecode[i]); 27913464ebd5Sriastradh } 27923464ebd5Sriastradh } else { 2793af69d88dSmrg memcpy(bytecode, bc.bytecode, fs_size); 27943464ebd5Sriastradh } 27957ec681f3Smrg rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf); 27963464ebd5Sriastradh 2797af69d88dSmrg r600_bytecode_clear(&bc); 2798af69d88dSmrg return shader; 2799af69d88dSmrg} 28003464ebd5Sriastradh 2801af69d88dSmrgvoid r600_bytecode_alu_read(struct r600_bytecode *bc, 2802af69d88dSmrg struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) 2803af69d88dSmrg{ 2804af69d88dSmrg /* WORD0 */ 2805af69d88dSmrg alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0); 2806af69d88dSmrg alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0); 2807af69d88dSmrg alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0); 2808af69d88dSmrg alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0); 2809af69d88dSmrg alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0); 2810af69d88dSmrg alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0); 2811af69d88dSmrg alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0); 2812af69d88dSmrg alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0); 2813af69d88dSmrg alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0); 2814af69d88dSmrg alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0); 2815af69d88dSmrg alu->last = G_SQ_ALU_WORD0_LAST(word0); 2816af69d88dSmrg 2817af69d88dSmrg /* WORD1 */ 2818af69d88dSmrg alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1); 2819af69d88dSmrg if (alu->bank_swizzle) 2820af69d88dSmrg alu->bank_swizzle_force = alu->bank_swizzle; 2821af69d88dSmrg alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1); 2822af69d88dSmrg alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1); 2823af69d88dSmrg alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1); 2824af69d88dSmrg alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1); 2825af69d88dSmrg if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ 2826af69d88dSmrg { 2827af69d88dSmrg alu->is_op3 = 1; 2828af69d88dSmrg alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); 2829af69d88dSmrg alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); 2830af69d88dSmrg alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1); 2831af69d88dSmrg alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1); 2832af69d88dSmrg alu->op = r600_isa_alu_by_opcode(bc->isa, 2833af69d88dSmrg G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1); 2834af69d88dSmrg 2835af69d88dSmrg } 2836af69d88dSmrg else /*ALU_DWORD1_OP2*/ 2837af69d88dSmrg { 2838af69d88dSmrg alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1); 2839af69d88dSmrg alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1); 2840af69d88dSmrg alu->op = r600_isa_alu_by_opcode(bc->isa, 2841af69d88dSmrg G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0); 2842af69d88dSmrg alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1); 2843af69d88dSmrg alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1); 2844af69d88dSmrg alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1); 2845af69d88dSmrg alu->execute_mask = 2846af69d88dSmrg G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); 2847af69d88dSmrg } 2848af69d88dSmrg} 28493464ebd5Sriastradh 2850af69d88dSmrg#if 0 2851af69d88dSmrgvoid r600_bytecode_export_read(struct r600_bytecode *bc, 2852af69d88dSmrg struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) 2853af69d88dSmrg{ 2854af69d88dSmrg output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); 2855af69d88dSmrg output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); 2856af69d88dSmrg output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); 2857af69d88dSmrg output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); 2858af69d88dSmrg 2859af69d88dSmrg output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); 2860af69d88dSmrg output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); 2861af69d88dSmrg output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); 2862af69d88dSmrg output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); 2863af69d88dSmrg output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); 2864af69d88dSmrg output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); 2865af69d88dSmrg output->op = r600_isa_cf_by_opcode(bc->isa, 2866af69d88dSmrg G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0); 2867af69d88dSmrg output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); 2868af69d88dSmrg output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); 2869af69d88dSmrg output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); 28703464ebd5Sriastradh} 2871af69d88dSmrg#endif 2872