1/* 2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "ir3.h" 25 26#include <stdlib.h> 27#include <stdio.h> 28#include <string.h> 29#include <assert.h> 30#include <stdbool.h> 31#include <errno.h> 32 33#include "util/bitscan.h" 34#include "util/ralloc.h" 35#include "util/u_math.h" 36 37#include "instr-a3xx.h" 38#include "ir3_compiler.h" 39 40/* simple allocator to carve allocations out of an up-front allocated heap, 41 * so that we can free everything easily in one shot. 42 */ 43void * ir3_alloc(struct ir3 *shader, int sz) 44{ 45 return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */ 46} 47 48struct ir3 * ir3_create(struct ir3_compiler *compiler, 49 gl_shader_stage type, unsigned nin, unsigned nout) 50{ 51 struct ir3 *shader = rzalloc(NULL, struct ir3); 52 53 shader->compiler = compiler; 54 shader->type = type; 55 shader->ninputs = nin; 56 shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); 57 58 shader->noutputs = nout; 59 shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); 60 61 list_inithead(&shader->block_list); 62 list_inithead(&shader->array_list); 63 64 return shader; 65} 66 67void ir3_destroy(struct ir3 *shader) 68{ 69 ralloc_free(shader); 70} 71 72#define iassert(cond) do { \ 73 if (!(cond)) { \ 74 debug_assert(cond); \ 75 return -1; \ 76 } } while (0) 77 78#define iassert_type(reg, full) do { \ 79 if ((full)) { \ 80 iassert(!((reg)->flags & IR3_REG_HALF)); \ 81 } else { \ 82 iassert((reg)->flags & IR3_REG_HALF); \ 83 } } while (0); 84 85static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, 86 uint32_t repeat, uint32_t valid_flags) 87{ 88 reg_t val = { .dummy32 = 0 }; 89 90 if (reg->flags & ~valid_flags) { 91 debug_printf("INVALID FLAGS: %x vs %x\n", 92 reg->flags, valid_flags); 93 } 94 95 if (!(reg->flags & IR3_REG_R)) 96 repeat = 0; 97 98 if (reg->flags & IR3_REG_IMMED) { 99 val.iim_val = reg->iim_val; 100 } else { 101 unsigned components; 102 int16_t max; 103 104 if (reg->flags & IR3_REG_RELATIV) { 105 components = reg->size; 106 val.idummy10 = reg->array.offset; 107 max = (reg->array.offset + repeat + components - 1); 108 } else { 109 components = util_last_bit(reg->wrmask); 110 val.comp = reg->num & 0x3; 111 val.num = reg->num >> 2; 112 max = (reg->num + repeat + components - 1); 113 } 114 115 if (reg->flags & IR3_REG_CONST) { 116 info->max_const = MAX2(info->max_const, max >> 2); 117 } else if (val.num == 63) { 118 /* ignore writes to dummy register r63.x */ 119 } else if (max < regid(48, 0)) { 120 if (reg->flags & IR3_REG_HALF) { 121 if (info->gpu_id >= 600) { 122 /* starting w/ a6xx, half regs conflict with full regs: */ 123 info->max_reg = MAX2(info->max_reg, max >> 3); 124 } else { 125 info->max_half_reg = MAX2(info->max_half_reg, max >> 2); 126 } 127 } else { 128 info->max_reg = MAX2(info->max_reg, max >> 2); 129 } 130 } 131 } 132 133 return val.dummy32; 134} 135 136static int emit_cat0(struct ir3_instruction *instr, void *ptr, 137 struct ir3_info *info) 138{ 139 instr_cat0_t *cat0 = ptr; 140 141 if (info->gpu_id >= 500) { 142 cat0->a5xx.immed = instr->cat0.immed; 143 } else if (info->gpu_id >= 400) { 144 cat0->a4xx.immed = instr->cat0.immed; 145 } else { 146 cat0->a3xx.immed = instr->cat0.immed; 147 } 148 cat0->repeat = instr->repeat; 149 cat0->ss = !!(instr->flags & IR3_INSTR_SS); 150 cat0->inv = instr->cat0.inv; 151 cat0->comp = instr->cat0.comp; 152 cat0->opc = instr->opc; 153 cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 154 cat0->sync = !!(instr->flags & IR3_INSTR_SY); 155 cat0->opc_cat = 0; 156 157 return 0; 158} 159 160static int emit_cat1(struct ir3_instruction *instr, void *ptr, 161 struct ir3_info *info) 162{ 163 struct ir3_register *dst = instr->regs[0]; 164 struct ir3_register *src = instr->regs[1]; 165 instr_cat1_t *cat1 = ptr; 166 167 iassert(instr->regs_count == 2); 168 iassert_type(dst, type_size(instr->cat1.dst_type) == 32); 169 if (!(src->flags & IR3_REG_IMMED)) 170 iassert_type(src, type_size(instr->cat1.src_type) == 32); 171 172 if (src->flags & IR3_REG_IMMED) { 173 cat1->iim_val = src->iim_val; 174 cat1->src_im = 1; 175 } else if (src->flags & IR3_REG_RELATIV) { 176 cat1->off = reg(src, info, instr->repeat, 177 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV); 178 cat1->src_rel = 1; 179 cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); 180 } else { 181 cat1->src = reg(src, info, instr->repeat, 182 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF); 183 cat1->src_c = !!(src->flags & IR3_REG_CONST); 184 } 185 186 cat1->dst = reg(dst, info, instr->repeat, 187 IR3_REG_RELATIV | IR3_REG_EVEN | 188 IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); 189 cat1->repeat = instr->repeat; 190 cat1->src_r = !!(src->flags & IR3_REG_R); 191 cat1->ss = !!(instr->flags & IR3_INSTR_SS); 192 cat1->ul = !!(instr->flags & IR3_INSTR_UL); 193 cat1->dst_type = instr->cat1.dst_type; 194 cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); 195 cat1->src_type = instr->cat1.src_type; 196 cat1->even = !!(dst->flags & IR3_REG_EVEN); 197 cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); 198 cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 199 cat1->sync = !!(instr->flags & IR3_INSTR_SY); 200 cat1->opc_cat = 1; 201 202 return 0; 203} 204 205static int emit_cat2(struct ir3_instruction *instr, void *ptr, 206 struct ir3_info *info) 207{ 208 struct ir3_register *dst = instr->regs[0]; 209 struct ir3_register *src1 = instr->regs[1]; 210 struct ir3_register *src2 = instr->regs[2]; 211 instr_cat2_t *cat2 = ptr; 212 unsigned absneg = ir3_cat2_absneg(instr->opc); 213 214 iassert((instr->regs_count == 2) || (instr->regs_count == 3)); 215 216 if (instr->nop) { 217 iassert(!instr->repeat); 218 iassert(instr->nop <= 3); 219 220 cat2->src1_r = instr->nop & 0x1; 221 cat2->src2_r = (instr->nop >> 1) & 0x1; 222 } else { 223 cat2->src1_r = !!(src1->flags & IR3_REG_R); 224 if (src2) 225 cat2->src2_r = !!(src2->flags & IR3_REG_R); 226 } 227 228 if (src1->flags & IR3_REG_RELATIV) { 229 iassert(src1->array.offset < (1 << 10)); 230 cat2->rel1.src1 = reg(src1, info, instr->repeat, 231 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 232 IR3_REG_HALF | absneg); 233 cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); 234 cat2->rel1.src1_rel = 1; 235 } else if (src1->flags & IR3_REG_CONST) { 236 iassert(src1->num < (1 << 12)); 237 cat2->c1.src1 = reg(src1, info, instr->repeat, 238 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 239 cat2->c1.src1_c = 1; 240 } else { 241 iassert(src1->num < (1 << 11)); 242 cat2->src1 = reg(src1, info, instr->repeat, 243 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | 244 absneg); 245 } 246 cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); 247 cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 248 cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS)); 249 250 if (src2) { 251 iassert((src2->flags & IR3_REG_IMMED) || 252 !((src1->flags ^ src2->flags) & IR3_REG_HALF)); 253 254 if (src2->flags & IR3_REG_RELATIV) { 255 iassert(src2->array.offset < (1 << 10)); 256 cat2->rel2.src2 = reg(src2, info, instr->repeat, 257 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 258 IR3_REG_HALF | absneg); 259 cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); 260 cat2->rel2.src2_rel = 1; 261 } else if (src2->flags & IR3_REG_CONST) { 262 iassert(src2->num < (1 << 12)); 263 cat2->c2.src2 = reg(src2, info, instr->repeat, 264 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 265 cat2->c2.src2_c = 1; 266 } else { 267 iassert(src2->num < (1 << 11)); 268 cat2->src2 = reg(src2, info, instr->repeat, 269 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | 270 absneg); 271 } 272 273 cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); 274 cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 275 cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS)); 276 } 277 278 cat2->dst = reg(dst, info, instr->repeat, 279 IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); 280 cat2->repeat = instr->repeat; 281 cat2->sat = !!(instr->flags & IR3_INSTR_SAT); 282 cat2->ss = !!(instr->flags & IR3_INSTR_SS); 283 cat2->ul = !!(instr->flags & IR3_INSTR_UL); 284 cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); 285 cat2->ei = !!(dst->flags & IR3_REG_EI); 286 cat2->cond = instr->cat2.condition; 287 cat2->full = ! (src1->flags & IR3_REG_HALF); 288 cat2->opc = instr->opc; 289 cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 290 cat2->sync = !!(instr->flags & IR3_INSTR_SY); 291 cat2->opc_cat = 2; 292 293 return 0; 294} 295 296static int emit_cat3(struct ir3_instruction *instr, void *ptr, 297 struct ir3_info *info) 298{ 299 struct ir3_register *dst = instr->regs[0]; 300 struct ir3_register *src1 = instr->regs[1]; 301 struct ir3_register *src2 = instr->regs[2]; 302 struct ir3_register *src3 = instr->regs[3]; 303 unsigned absneg = ir3_cat3_absneg(instr->opc); 304 instr_cat3_t *cat3 = ptr; 305 uint32_t src_flags = 0; 306 307 switch (instr->opc) { 308 case OPC_MAD_F16: 309 case OPC_MAD_U16: 310 case OPC_MAD_S16: 311 case OPC_SEL_B16: 312 case OPC_SEL_S16: 313 case OPC_SEL_F16: 314 case OPC_SAD_S16: 315 case OPC_SAD_S32: // really?? 316 src_flags |= IR3_REG_HALF; 317 break; 318 default: 319 break; 320 } 321 322 iassert(instr->regs_count == 4); 323 iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); 324 iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); 325 iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); 326 327 if (instr->nop) { 328 iassert(!instr->repeat); 329 iassert(instr->nop <= 3); 330 331 cat3->src1_r = instr->nop & 0x1; 332 cat3->src2_r = (instr->nop >> 1) & 0x1; 333 } else { 334 cat3->src1_r = !!(src1->flags & IR3_REG_R); 335 cat3->src2_r = !!(src2->flags & IR3_REG_R); 336 } 337 338 if (src1->flags & IR3_REG_RELATIV) { 339 iassert(src1->array.offset < (1 << 10)); 340 cat3->rel1.src1 = reg(src1, info, instr->repeat, 341 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 342 IR3_REG_HALF | absneg); 343 cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); 344 cat3->rel1.src1_rel = 1; 345 } else if (src1->flags & IR3_REG_CONST) { 346 iassert(src1->num < (1 << 12)); 347 cat3->c1.src1 = reg(src1, info, instr->repeat, 348 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 349 cat3->c1.src1_c = 1; 350 } else { 351 iassert(src1->num < (1 << 11)); 352 cat3->src1 = reg(src1, info, instr->repeat, 353 IR3_REG_R | IR3_REG_HALF | absneg); 354 } 355 356 cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 357 358 cat3->src2 = reg(src2, info, instr->repeat, 359 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); 360 cat3->src2_c = !!(src2->flags & IR3_REG_CONST); 361 cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 362 363 if (src3->flags & IR3_REG_RELATIV) { 364 iassert(src3->array.offset < (1 << 10)); 365 cat3->rel2.src3 = reg(src3, info, instr->repeat, 366 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 367 IR3_REG_HALF | absneg); 368 cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); 369 cat3->rel2.src3_rel = 1; 370 } else if (src3->flags & IR3_REG_CONST) { 371 iassert(src3->num < (1 << 12)); 372 cat3->c2.src3 = reg(src3, info, instr->repeat, 373 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 374 cat3->c2.src3_c = 1; 375 } else { 376 iassert(src3->num < (1 << 11)); 377 cat3->src3 = reg(src3, info, instr->repeat, 378 IR3_REG_R | IR3_REG_HALF | absneg); 379 } 380 381 cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 382 cat3->src3_r = !!(src3->flags & IR3_REG_R); 383 384 cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 385 cat3->repeat = instr->repeat; 386 cat3->sat = !!(instr->flags & IR3_INSTR_SAT); 387 cat3->ss = !!(instr->flags & IR3_INSTR_SS); 388 cat3->ul = !!(instr->flags & IR3_INSTR_UL); 389 cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); 390 cat3->opc = instr->opc; 391 cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 392 cat3->sync = !!(instr->flags & IR3_INSTR_SY); 393 cat3->opc_cat = 3; 394 395 return 0; 396} 397 398static int emit_cat4(struct ir3_instruction *instr, void *ptr, 399 struct ir3_info *info) 400{ 401 struct ir3_register *dst = instr->regs[0]; 402 struct ir3_register *src = instr->regs[1]; 403 instr_cat4_t *cat4 = ptr; 404 405 iassert(instr->regs_count == 2); 406 407 if (src->flags & IR3_REG_RELATIV) { 408 iassert(src->array.offset < (1 << 10)); 409 cat4->rel.src = reg(src, info, instr->repeat, 410 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG | 411 IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF); 412 cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); 413 cat4->rel.src_rel = 1; 414 } else if (src->flags & IR3_REG_CONST) { 415 iassert(src->num < (1 << 12)); 416 cat4->c.src = reg(src, info, instr->repeat, 417 IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS | 418 IR3_REG_R | IR3_REG_HALF); 419 cat4->c.src_c = 1; 420 } else { 421 iassert(src->num < (1 << 11)); 422 cat4->src = reg(src, info, instr->repeat, 423 IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS | 424 IR3_REG_R | IR3_REG_HALF); 425 } 426 427 cat4->src_im = !!(src->flags & IR3_REG_IMMED); 428 cat4->src_neg = !!(src->flags & IR3_REG_FNEG); 429 cat4->src_abs = !!(src->flags & IR3_REG_FABS); 430 cat4->src_r = !!(src->flags & IR3_REG_R); 431 432 cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 433 cat4->repeat = instr->repeat; 434 cat4->sat = !!(instr->flags & IR3_INSTR_SAT); 435 cat4->ss = !!(instr->flags & IR3_INSTR_SS); 436 cat4->ul = !!(instr->flags & IR3_INSTR_UL); 437 cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); 438 cat4->full = ! (src->flags & IR3_REG_HALF); 439 cat4->opc = instr->opc; 440 cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 441 cat4->sync = !!(instr->flags & IR3_INSTR_SY); 442 cat4->opc_cat = 4; 443 444 return 0; 445} 446 447static int emit_cat5(struct ir3_instruction *instr, void *ptr, 448 struct ir3_info *info) 449{ 450 struct ir3_register *dst = instr->regs[0]; 451 /* To simplify things when there could be zero, one, or two args other 452 * than tex/sampler idx, we use the first src reg in the ir to hold 453 * samp_tex hvec2: 454 */ 455 struct ir3_register *src1; 456 struct ir3_register *src2; 457 instr_cat5_t *cat5 = ptr; 458 459 iassert((instr->regs_count == 2) || 460 (instr->regs_count == 3) || (instr->regs_count == 4)); 461 462 switch (instr->opc) { 463 case OPC_DSX: 464 case OPC_DSXPP_1: 465 case OPC_DSY: 466 case OPC_DSYPP_1: 467 case OPC_RGETPOS: 468 case OPC_RGETINFO: 469 iassert((instr->flags & IR3_INSTR_S2EN) == 0); 470 src1 = instr->regs[1]; 471 src2 = instr->regs_count > 2 ? instr->regs[2] : NULL; 472 break; 473 default: 474 src1 = instr->regs[2]; 475 src2 = instr->regs_count > 3 ? instr->regs[3] : NULL; 476 break; 477 } 478 479 assume(src1 || !src2); 480 481 if (src1) { 482 cat5->full = ! (src1->flags & IR3_REG_HALF); 483 cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); 484 } 485 486 if (instr->flags & IR3_INSTR_S2EN) { 487 struct ir3_register *samp_tex = instr->regs[1]; 488 if (src2) { 489 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); 490 cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); 491 } 492 iassert(samp_tex->flags & IR3_REG_HALF); 493 cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF); 494 iassert(!(instr->cat5.samp | instr->cat5.tex)); 495 } else { 496 if (src2) { 497 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); 498 cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); 499 } 500 cat5->norm.samp = instr->cat5.samp; 501 cat5->norm.tex = instr->cat5.tex; 502 } 503 504 cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 505 cat5->wrmask = dst->wrmask; 506 cat5->type = instr->cat5.type; 507 cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); 508 cat5->is_a = !!(instr->flags & IR3_INSTR_A); 509 cat5->is_s = !!(instr->flags & IR3_INSTR_S); 510 cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); 511 cat5->is_o = !!(instr->flags & IR3_INSTR_O); 512 cat5->is_p = !!(instr->flags & IR3_INSTR_P); 513 cat5->opc = instr->opc; 514 cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 515 cat5->sync = !!(instr->flags & IR3_INSTR_SY); 516 cat5->opc_cat = 5; 517 518 return 0; 519} 520 521static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr, 522 struct ir3_info *info) 523{ 524 struct ir3_register *src1, *src2; 525 instr_cat6_a6xx_t *cat6 = ptr; 526 bool has_dest = (instr->opc == OPC_LDIB); 527 528 /* first reg should be SSBO binding point: */ 529 iassert(instr->regs[1]->flags & IR3_REG_IMMED); 530 531 src1 = instr->regs[2]; 532 533 if (has_dest) { 534 /* the src2 field in the instruction is actually the destination 535 * register for load instructions: 536 */ 537 src2 = instr->regs[0]; 538 } else { 539 src2 = instr->regs[3]; 540 } 541 542 cat6->type = instr->cat6.type; 543 cat6->d = instr->cat6.d - 1; 544 cat6->typed = instr->cat6.typed; 545 cat6->type_size = instr->cat6.iim_val - 1; 546 cat6->opc = instr->opc; 547 cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 548 cat6->sync = !!(instr->flags & IR3_INSTR_SY); 549 cat6->opc_cat = 6; 550 551 cat6->src1 = reg(src1, info, instr->repeat, 0); 552 cat6->src2 = reg(src2, info, instr->repeat, 0); 553 cat6->ssbo = instr->regs[1]->iim_val; 554 555 switch (instr->opc) { 556 case OPC_ATOMIC_ADD: 557 case OPC_ATOMIC_SUB: 558 case OPC_ATOMIC_XCHG: 559 case OPC_ATOMIC_INC: 560 case OPC_ATOMIC_DEC: 561 case OPC_ATOMIC_CMPXCHG: 562 case OPC_ATOMIC_MIN: 563 case OPC_ATOMIC_MAX: 564 case OPC_ATOMIC_AND: 565 case OPC_ATOMIC_OR: 566 case OPC_ATOMIC_XOR: 567 cat6->pad1 = 0x1; 568 cat6->pad2 = 0xc; 569 cat6->pad3 = 0x0; 570 cat6->pad4 = 0x3; 571 break; 572 case OPC_STIB: 573 cat6->pad1 = 0x0; 574 cat6->pad2 = 0xc; 575 cat6->pad3 = 0x0; 576 cat6->pad4 = 0x2; 577 break; 578 case OPC_LDIB: 579 cat6->pad1 = 0x1; 580 cat6->pad2 = 0xc; 581 cat6->pad3 = 0x0; 582 cat6->pad4 = 0x2; 583 break; 584 case OPC_LDC: 585 cat6->pad1 = 0x0; 586 cat6->pad2 = 0x8; 587 cat6->pad3 = 0x0; 588 cat6->pad4 = 0x2; 589 break; 590 default: 591 iassert(0); 592 } 593 594 return 0; 595} 596 597static int emit_cat6(struct ir3_instruction *instr, void *ptr, 598 struct ir3_info *info) 599{ 600 struct ir3_register *dst, *src1, *src2; 601 instr_cat6_t *cat6 = ptr; 602 603 /* In a6xx we start using a new instruction encoding for some of 604 * these instructions: 605 */ 606 if (info->gpu_id >= 600) { 607 switch (instr->opc) { 608 case OPC_ATOMIC_ADD: 609 case OPC_ATOMIC_SUB: 610 case OPC_ATOMIC_XCHG: 611 case OPC_ATOMIC_INC: 612 case OPC_ATOMIC_DEC: 613 case OPC_ATOMIC_CMPXCHG: 614 case OPC_ATOMIC_MIN: 615 case OPC_ATOMIC_MAX: 616 case OPC_ATOMIC_AND: 617 case OPC_ATOMIC_OR: 618 case OPC_ATOMIC_XOR: 619 /* The shared variants of these still use the old encoding: */ 620 if (!(instr->flags & IR3_INSTR_G)) 621 break; 622 /* fallthrough */ 623 case OPC_STIB: 624 case OPC_LDIB: 625 case OPC_LDC: 626 return emit_cat6_a6xx(instr, ptr, info); 627 default: 628 break; 629 } 630 } 631 632 bool type_full = type_size(instr->cat6.type) == 32; 633 634 cat6->type = instr->cat6.type; 635 cat6->opc = instr->opc; 636 cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 637 cat6->sync = !!(instr->flags & IR3_INSTR_SY); 638 cat6->g = !!(instr->flags & IR3_INSTR_G); 639 cat6->opc_cat = 6; 640 641 switch (instr->opc) { 642 case OPC_RESINFO: 643 case OPC_RESFMT: 644 iassert_type(instr->regs[0], type_full); /* dst */ 645 iassert_type(instr->regs[1], type_full); /* src1 */ 646 break; 647 case OPC_L2G: 648 case OPC_G2L: 649 iassert_type(instr->regs[0], true); /* dst */ 650 iassert_type(instr->regs[1], true); /* src1 */ 651 break; 652 case OPC_STG: 653 case OPC_STL: 654 case OPC_STP: 655 case OPC_STLW: 656 case OPC_STIB: 657 /* no dst, so regs[0] is dummy */ 658 iassert_type(instr->regs[1], true); /* dst */ 659 iassert_type(instr->regs[2], type_full); /* src1 */ 660 iassert_type(instr->regs[3], true); /* src2 */ 661 break; 662 default: 663 iassert_type(instr->regs[0], type_full); /* dst */ 664 iassert_type(instr->regs[1], true); /* src1 */ 665 if (instr->regs_count > 2) 666 iassert_type(instr->regs[2], true); /* src1 */ 667 break; 668 } 669 670 /* the "dst" for a store instruction is (from the perspective 671 * of data flow in the shader, ie. register use/def, etc) in 672 * fact a register that is read by the instruction, rather 673 * than written: 674 */ 675 if (is_store(instr)) { 676 iassert(instr->regs_count >= 3); 677 678 dst = instr->regs[1]; 679 src1 = instr->regs[2]; 680 src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL; 681 } else { 682 iassert(instr->regs_count >= 2); 683 684 dst = instr->regs[0]; 685 src1 = instr->regs[1]; 686 src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL; 687 } 688 689 /* TODO we need a more comprehensive list about which instructions 690 * can be encoded which way. Or possibly use IR3_INSTR_0 flag to 691 * indicate to use the src_off encoding even if offset is zero 692 * (but then what to do about dst_off?) 693 */ 694 if (is_atomic(instr->opc)) { 695 instr_cat6ldgb_t *ldgb = ptr; 696 697 /* maybe these two bits both determine the instruction encoding? */ 698 cat6->src_off = false; 699 700 ldgb->d = instr->cat6.d - 1; 701 ldgb->typed = instr->cat6.typed; 702 ldgb->type_size = instr->cat6.iim_val - 1; 703 704 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 705 706 if (ldgb->g) { 707 struct ir3_register *src3 = instr->regs[3]; 708 struct ir3_register *src4 = instr->regs[4]; 709 710 /* first src is src_ssbo: */ 711 iassert(src1->flags & IR3_REG_IMMED); 712 ldgb->src_ssbo = src1->uim_val; 713 714 ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 715 ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); 716 ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); 717 ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); 718 719 ldgb->src3 = reg(src4, info, instr->repeat, 0); 720 ldgb->pad0 = 0x1; 721 ldgb->pad3 = 0x1; 722 } else { 723 ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); 724 ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED); 725 ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 726 ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED); 727 ldgb->pad0 = 0x1; 728 ldgb->pad3 = 0x0; 729 } 730 731 return 0; 732 } else if (instr->opc == OPC_LDGB) { 733 struct ir3_register *src3 = instr->regs[3]; 734 instr_cat6ldgb_t *ldgb = ptr; 735 736 /* maybe these two bits both determine the instruction encoding? */ 737 cat6->src_off = false; 738 739 ldgb->d = instr->cat6.d - 1; 740 ldgb->typed = instr->cat6.typed; 741 ldgb->type_size = instr->cat6.iim_val - 1; 742 743 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 744 745 /* first src is src_ssbo: */ 746 iassert(src1->flags & IR3_REG_IMMED); 747 ldgb->src_ssbo = src1->uim_val; 748 749 /* then next two are src1/src2: */ 750 ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 751 ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); 752 ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); 753 ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); 754 755 ldgb->pad0 = 0x0; 756 ldgb->pad3 = 0x1; 757 758 return 0; 759 } else if (instr->opc == OPC_RESINFO) { 760 instr_cat6ldgb_t *ldgb = ptr; 761 762 ldgb->d = instr->cat6.d - 1; 763 764 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 765 766 /* first src is src_ssbo: */ 767 iassert(src1->flags & IR3_REG_IMMED); 768 ldgb->src_ssbo = src1->uim_val; 769 770 return 0; 771 } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) { 772 struct ir3_register *src3 = instr->regs[4]; 773 instr_cat6stgb_t *stgb = ptr; 774 775 /* maybe these two bits both determine the instruction encoding? */ 776 cat6->src_off = true; 777 stgb->pad3 = 0x2; 778 779 stgb->d = instr->cat6.d - 1; 780 stgb->typed = instr->cat6.typed; 781 stgb->type_size = instr->cat6.iim_val - 1; 782 783 /* first src is dst_ssbo: */ 784 iassert(dst->flags & IR3_REG_IMMED); 785 stgb->dst_ssbo = dst->uim_val; 786 787 /* then src1/src2/src3: */ 788 stgb->src1 = reg(src1, info, instr->repeat, 0); 789 stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 790 stgb->src2_im = !!(src2->flags & IR3_REG_IMMED); 791 stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED); 792 stgb->src3_im = !!(src3->flags & IR3_REG_IMMED); 793 794 return 0; 795 } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) || 796 (instr->opc == OPC_LDL)) { 797 instr_cat6a_t *cat6a = ptr; 798 799 cat6->src_off = true; 800 801 cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); 802 cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); 803 if (src2) { 804 cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 805 cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED); 806 } 807 cat6a->off = instr->cat6.src_offset; 808 } else { 809 instr_cat6b_t *cat6b = ptr; 810 811 cat6->src_off = false; 812 813 cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF); 814 cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED); 815 if (src2) { 816 cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 817 cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED); 818 } 819 } 820 821 if (instr->cat6.dst_offset || (instr->opc == OPC_STG) || 822 (instr->opc == OPC_STL)) { 823 instr_cat6c_t *cat6c = ptr; 824 cat6->dst_off = true; 825 cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 826 cat6c->off = instr->cat6.dst_offset; 827 } else { 828 instr_cat6d_t *cat6d = ptr; 829 cat6->dst_off = false; 830 cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 831 } 832 833 return 0; 834} 835 836static int emit_cat7(struct ir3_instruction *instr, void *ptr, 837 struct ir3_info *info) 838{ 839 instr_cat7_t *cat7 = ptr; 840 841 cat7->ss = !!(instr->flags & IR3_INSTR_SS); 842 cat7->w = instr->cat7.w; 843 cat7->r = instr->cat7.r; 844 cat7->l = instr->cat7.l; 845 cat7->g = instr->cat7.g; 846 cat7->opc = instr->opc; 847 cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 848 cat7->sync = !!(instr->flags & IR3_INSTR_SY); 849 cat7->opc_cat = 7; 850 851 return 0; 852} 853 854static int (*emit[])(struct ir3_instruction *instr, void *ptr, 855 struct ir3_info *info) = { 856 emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, 857 emit_cat7, 858}; 859 860void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, 861 uint32_t gpu_id) 862{ 863 uint32_t *ptr, *dwords; 864 865 info->gpu_id = gpu_id; 866 info->max_reg = -1; 867 info->max_half_reg = -1; 868 info->max_const = -1; 869 info->instrs_count = 0; 870 info->sizedwords = 0; 871 info->ss = info->sy = 0; 872 873 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { 874 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 875 info->sizedwords += 2; 876 } 877 } 878 879 /* need an integer number of instruction "groups" (sets of 16 880 * instructions on a4xx or sets of 4 instructions on a3xx), 881 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) 882 */ 883 if (gpu_id >= 400) { 884 info->sizedwords = align(info->sizedwords, 16 * 2); 885 } else { 886 info->sizedwords = align(info->sizedwords, 4 * 2); 887 } 888 889 ptr = dwords = calloc(4, info->sizedwords); 890 891 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { 892 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 893 int ret = emit[opc_cat(instr->opc)](instr, dwords, info); 894 if (ret) 895 goto fail; 896 info->instrs_count += 1 + instr->repeat + instr->nop; 897 dwords += 2; 898 899 if (instr->flags & IR3_INSTR_SS) 900 info->ss++; 901 902 if (instr->flags & IR3_INSTR_SY) 903 info->sy++; 904 } 905 } 906 907 return ptr; 908 909fail: 910 free(ptr); 911 return NULL; 912} 913 914static struct ir3_register * reg_create(struct ir3 *shader, 915 int num, int flags) 916{ 917 struct ir3_register *reg = 918 ir3_alloc(shader, sizeof(struct ir3_register)); 919 reg->wrmask = 1; 920 reg->flags = flags; 921 reg->num = num; 922 if (shader->compiler->gpu_id >= 600) 923 reg->merged = true; 924 return reg; 925} 926 927static void insert_instr(struct ir3_block *block, 928 struct ir3_instruction *instr) 929{ 930 struct ir3 *shader = block->shader; 931#ifdef DEBUG 932 instr->serialno = ++shader->instr_count; 933#endif 934 list_addtail(&instr->node, &block->instr_list); 935 936 if (is_input(instr)) 937 array_insert(shader, shader->baryfs, instr); 938} 939 940struct ir3_block * ir3_block_create(struct ir3 *shader) 941{ 942 struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); 943#ifdef DEBUG 944 block->serialno = ++shader->block_count; 945#endif 946 block->shader = shader; 947 list_inithead(&block->node); 948 list_inithead(&block->instr_list); 949 return block; 950} 951 952static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg) 953{ 954 struct ir3_instruction *instr; 955 unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0])); 956 char *ptr = ir3_alloc(block->shader, sz); 957 958 instr = (struct ir3_instruction *)ptr; 959 ptr += sizeof(*instr); 960 instr->regs = (struct ir3_register **)ptr; 961 962#ifdef DEBUG 963 instr->regs_max = nreg; 964#endif 965 966 return instr; 967} 968 969struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, 970 opc_t opc, int nreg) 971{ 972 struct ir3_instruction *instr = instr_create(block, nreg); 973 instr->block = block; 974 instr->opc = opc; 975 insert_instr(block, instr); 976 return instr; 977} 978 979struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc) 980{ 981 /* NOTE: we could be slightly more clever, at least for non-meta, 982 * and choose # of regs based on category. 983 */ 984 return ir3_instr_create2(block, opc, 4); 985} 986 987struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) 988{ 989 struct ir3_instruction *new_instr = instr_create(instr->block, 990 instr->regs_count); 991 struct ir3_register **regs; 992 unsigned i; 993 994 regs = new_instr->regs; 995 *new_instr = *instr; 996 new_instr->regs = regs; 997 998 insert_instr(instr->block, new_instr); 999 1000 /* clone registers: */ 1001 new_instr->regs_count = 0; 1002 for (i = 0; i < instr->regs_count; i++) { 1003 struct ir3_register *reg = instr->regs[i]; 1004 struct ir3_register *new_reg = 1005 ir3_reg_create(new_instr, reg->num, reg->flags); 1006 *new_reg = *reg; 1007 } 1008 1009 return new_instr; 1010} 1011 1012/* Add a false dependency to instruction, to ensure it is scheduled first: */ 1013void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep) 1014{ 1015 array_insert(instr, instr->deps, dep); 1016} 1017 1018struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, 1019 int num, int flags) 1020{ 1021 struct ir3 *shader = instr->block->shader; 1022 struct ir3_register *reg = reg_create(shader, num, flags); 1023#ifdef DEBUG 1024 debug_assert(instr->regs_count < instr->regs_max); 1025#endif 1026 instr->regs[instr->regs_count++] = reg; 1027 return reg; 1028} 1029 1030struct ir3_register * ir3_reg_clone(struct ir3 *shader, 1031 struct ir3_register *reg) 1032{ 1033 struct ir3_register *new_reg = reg_create(shader, 0, 0); 1034 *new_reg = *reg; 1035 return new_reg; 1036} 1037 1038void 1039ir3_instr_set_address(struct ir3_instruction *instr, 1040 struct ir3_instruction *addr) 1041{ 1042 if (instr->address != addr) { 1043 struct ir3 *ir = instr->block->shader; 1044 instr->address = addr; 1045 array_insert(ir, ir->indirects, instr); 1046 } 1047} 1048 1049void 1050ir3_block_clear_mark(struct ir3_block *block) 1051{ 1052 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) 1053 instr->flags &= ~IR3_INSTR_MARK; 1054} 1055 1056void 1057ir3_clear_mark(struct ir3 *ir) 1058{ 1059 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 1060 ir3_block_clear_mark(block); 1061 } 1062} 1063 1064/* note: this will destroy instr->depth, don't do it until after sched! */ 1065unsigned 1066ir3_count_instructions(struct ir3 *ir) 1067{ 1068 unsigned cnt = 0; 1069 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 1070 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 1071 instr->ip = cnt++; 1072 } 1073 block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; 1074 block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; 1075 } 1076 return cnt; 1077} 1078 1079struct ir3_array * 1080ir3_lookup_array(struct ir3 *ir, unsigned id) 1081{ 1082 list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) 1083 if (arr->id == id) 1084 return arr; 1085 return NULL; 1086} 1087