1/* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#ifdef XF86DRM_MODE 32 33#include "xf86.h" 34 35#include "cayman_shader.h" 36#include "cayman_reg.h" 37 38/* solid vs --------------------------------------- */ 39int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 40{ 41 int i = 0; 42 43 /* 0 */ 44 shader[i++] = CF_DWORD0(ADDR(4), 45 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 46 shader[i++] = CF_DWORD1(POP_COUNT(0), 47 CF_CONST(0), 48 COND(SQ_CF_COND_ACTIVE), 49 I_COUNT(1), 50 VALID_PIXEL_MODE(0), 51 CF_INST(SQ_CF_INST_TC), 52 BARRIER(1)); 53 /* 1 */ 54 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 55 TYPE(SQ_EXPORT_POS), 56 RW_GPR(1), 57 RW_REL(ABSOLUTE), 58 INDEX_GPR(0), 59 ELEM_SIZE(0)); 60 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 61 SRC_SEL_Y(SQ_SEL_Y), 62 SRC_SEL_Z(SQ_SEL_Z), 63 SRC_SEL_W(SQ_SEL_W), 64 BURST_COUNT(1), 65 VALID_PIXEL_MODE(0), 66 CF_INST(SQ_CF_INST_EXPORT_DONE), 67 MARK(0), 68 BARRIER(1)); 69 /* 2 - always export a param whether it's used or not */ 70 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 71 TYPE(SQ_EXPORT_PARAM), 72 RW_GPR(0), 73 RW_REL(ABSOLUTE), 74 INDEX_GPR(0), 75 ELEM_SIZE(0)); 76 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 77 SRC_SEL_Y(SQ_SEL_Y), 78 SRC_SEL_Z(SQ_SEL_Z), 79 SRC_SEL_W(SQ_SEL_W), 80 BURST_COUNT(0), 81 VALID_PIXEL_MODE(0), 82 CF_INST(SQ_CF_INST_EXPORT_DONE), 83 MARK(0), 84 BARRIER(0)); 85 /* 3 - end */ 86 shader[i++] = CF_DWORD0(ADDR(0), 87 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 88 shader[i++] = CF_DWORD1(POP_COUNT(0), 89 CF_CONST(0), 90 COND(SQ_CF_COND_ACTIVE), 91 I_COUNT(0), 92 VALID_PIXEL_MODE(0), 93 CF_INST(SQ_CF_INST_END), 94 BARRIER(1)); 95 /* 4/5 */ 96 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 97 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 98 FETCH_WHOLE_QUAD(0), 99 BUFFER_ID(0), 100 SRC_GPR(0), 101 SRC_REL(ABSOLUTE), 102 SRC_SEL_X(SQ_SEL_X), 103 SRC_SEL_Y(SQ_SEL_Y), 104 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 105 LDS_REQ(0), 106 COALESCED_READ(0)); 107 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 108 DST_REL(0), 109 DST_SEL_X(SQ_SEL_X), 110 DST_SEL_Y(SQ_SEL_Y), 111 DST_SEL_Z(SQ_SEL_0), 112 DST_SEL_W(SQ_SEL_1), 113 USE_CONST_FIELDS(0), 114 DATA_FORMAT(FMT_32_32_FLOAT), 115 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 116 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 117 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 118 shader[i++] = VTX_DWORD2(OFFSET(0), 119#if X_BYTE_ORDER == X_BIG_ENDIAN 120 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 121#else 122 ENDIAN_SWAP(ENDIAN_NONE), 123#endif 124 CONST_BUF_NO_STRIDE(0), 125 ALT_CONST(0), 126 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 127 shader[i++] = VTX_DWORD_PAD; 128 129 return i; 130} 131 132/* solid ps --------------------------------------- */ 133int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 134{ 135 int i = 0; 136 137 /* 0 */ 138 shader[i++] = CF_ALU_DWORD0(ADDR(3), 139 KCACHE_BANK0(0), 140 KCACHE_BANK1(0), 141 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 142 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 143 KCACHE_ADDR0(0), 144 KCACHE_ADDR1(0), 145 I_COUNT(4), 146 ALT_CONST(0), 147 CF_INST(SQ_CF_INST_ALU), 148 WHOLE_QUAD_MODE(0), 149 BARRIER(1)); 150 /* 1 */ 151 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 152 TYPE(SQ_EXPORT_PIXEL), 153 RW_GPR(0), 154 RW_REL(ABSOLUTE), 155 INDEX_GPR(0), 156 ELEM_SIZE(1)); 157 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 158 SRC_SEL_Y(SQ_SEL_Y), 159 SRC_SEL_Z(SQ_SEL_Z), 160 SRC_SEL_W(SQ_SEL_W), 161 BURST_COUNT(1), 162 VALID_PIXEL_MODE(0), 163 CF_INST(SQ_CF_INST_EXPORT_DONE), 164 MARK(0), 165 BARRIER(1)); 166 167 /* 2 - end */ 168 shader[i++] = CF_DWORD0(ADDR(0), 169 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 170 shader[i++] = CF_DWORD1(POP_COUNT(0), 171 CF_CONST(0), 172 COND(SQ_CF_COND_ACTIVE), 173 I_COUNT(0), 174 VALID_PIXEL_MODE(0), 175 CF_INST(SQ_CF_INST_END), 176 BARRIER(1)); 177 /* 3 */ 178 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 179 SRC0_REL(ABSOLUTE), 180 SRC0_ELEM(ELEM_X), 181 SRC0_NEG(0), 182 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 183 SRC1_REL(ABSOLUTE), 184 SRC1_ELEM(ELEM_X), 185 SRC1_NEG(0), 186 INDEX_MODE(SQ_INDEX_AR_X), 187 PRED_SEL(SQ_PRED_SEL_OFF), 188 LAST(0)); 189 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 190 SRC1_ABS(0), 191 UPDATE_EXECUTE_MASK(0), 192 UPDATE_PRED(0), 193 WRITE_MASK(1), 194 OMOD(SQ_ALU_OMOD_OFF), 195 ALU_INST(SQ_OP2_INST_MOV), 196 BANK_SWIZZLE(SQ_ALU_VEC_012), 197 DST_GPR(0), 198 DST_REL(ABSOLUTE), 199 DST_ELEM(ELEM_X), 200 CLAMP(1)); 201 /* 4 */ 202 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 203 SRC0_REL(ABSOLUTE), 204 SRC0_ELEM(ELEM_Y), 205 SRC0_NEG(0), 206 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 207 SRC1_REL(ABSOLUTE), 208 SRC1_ELEM(ELEM_Y), 209 SRC1_NEG(0), 210 INDEX_MODE(SQ_INDEX_AR_X), 211 PRED_SEL(SQ_PRED_SEL_OFF), 212 LAST(0)); 213 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 214 SRC1_ABS(0), 215 UPDATE_EXECUTE_MASK(0), 216 UPDATE_PRED(0), 217 WRITE_MASK(1), 218 OMOD(SQ_ALU_OMOD_OFF), 219 ALU_INST(SQ_OP2_INST_MOV), 220 BANK_SWIZZLE(SQ_ALU_VEC_012), 221 DST_GPR(0), 222 DST_REL(ABSOLUTE), 223 DST_ELEM(ELEM_Y), 224 CLAMP(1)); 225 /* 5 */ 226 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 227 SRC0_REL(ABSOLUTE), 228 SRC0_ELEM(ELEM_Z), 229 SRC0_NEG(0), 230 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 231 SRC1_REL(ABSOLUTE), 232 SRC1_ELEM(ELEM_Z), 233 SRC1_NEG(0), 234 INDEX_MODE(SQ_INDEX_AR_X), 235 PRED_SEL(SQ_PRED_SEL_OFF), 236 LAST(0)); 237 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 238 SRC1_ABS(0), 239 UPDATE_EXECUTE_MASK(0), 240 UPDATE_PRED(0), 241 WRITE_MASK(1), 242 OMOD(SQ_ALU_OMOD_OFF), 243 ALU_INST(SQ_OP2_INST_MOV), 244 BANK_SWIZZLE(SQ_ALU_VEC_012), 245 DST_GPR(0), 246 DST_REL(ABSOLUTE), 247 DST_ELEM(ELEM_Z), 248 CLAMP(1)); 249 /* 6 */ 250 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 251 SRC0_REL(ABSOLUTE), 252 SRC0_ELEM(ELEM_W), 253 SRC0_NEG(0), 254 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 255 SRC1_REL(ABSOLUTE), 256 SRC1_ELEM(ELEM_W), 257 SRC1_NEG(0), 258 INDEX_MODE(SQ_INDEX_AR_X), 259 PRED_SEL(SQ_PRED_SEL_OFF), 260 LAST(1)); 261 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 262 SRC1_ABS(0), 263 UPDATE_EXECUTE_MASK(0), 264 UPDATE_PRED(0), 265 WRITE_MASK(1), 266 OMOD(SQ_ALU_OMOD_OFF), 267 ALU_INST(SQ_OP2_INST_MOV), 268 BANK_SWIZZLE(SQ_ALU_VEC_012), 269 DST_GPR(0), 270 DST_REL(ABSOLUTE), 271 DST_ELEM(ELEM_W), 272 CLAMP(1)); 273 274 return i; 275} 276 277/* copy vs --------------------------------------- */ 278int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 279{ 280 int i = 0; 281 282 /* 0 */ 283 shader[i++] = CF_DWORD0(ADDR(4), 284 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 285 shader[i++] = CF_DWORD1(POP_COUNT(0), 286 CF_CONST(0), 287 COND(SQ_CF_COND_ACTIVE), 288 I_COUNT(2), 289 VALID_PIXEL_MODE(0), 290 CF_INST(SQ_CF_INST_TC), 291 BARRIER(1)); 292 /* 1 */ 293 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 294 TYPE(SQ_EXPORT_POS), 295 RW_GPR(1), 296 RW_REL(ABSOLUTE), 297 INDEX_GPR(0), 298 ELEM_SIZE(0)); 299 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 300 SRC_SEL_Y(SQ_SEL_Y), 301 SRC_SEL_Z(SQ_SEL_Z), 302 SRC_SEL_W(SQ_SEL_W), 303 BURST_COUNT(0), 304 VALID_PIXEL_MODE(0), 305 CF_INST(SQ_CF_INST_EXPORT_DONE), 306 MARK(0), 307 BARRIER(1)); 308 /* 2 */ 309 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 310 TYPE(SQ_EXPORT_PARAM), 311 RW_GPR(0), 312 RW_REL(ABSOLUTE), 313 INDEX_GPR(0), 314 ELEM_SIZE(0)); 315 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 316 SRC_SEL_Y(SQ_SEL_Y), 317 SRC_SEL_Z(SQ_SEL_Z), 318 SRC_SEL_W(SQ_SEL_W), 319 BURST_COUNT(0), 320 VALID_PIXEL_MODE(0), 321 CF_INST(SQ_CF_INST_EXPORT_DONE), 322 MARK(0), 323 BARRIER(0)); 324 /* 3 - end */ 325 shader[i++] = CF_DWORD0(ADDR(0), 326 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 327 shader[i++] = CF_DWORD1(POP_COUNT(0), 328 CF_CONST(0), 329 COND(SQ_CF_COND_ACTIVE), 330 I_COUNT(0), 331 VALID_PIXEL_MODE(0), 332 CF_INST(SQ_CF_INST_END), 333 BARRIER(1)); 334 /* 4/5 */ 335 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 336 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 337 FETCH_WHOLE_QUAD(0), 338 BUFFER_ID(0), 339 SRC_GPR(0), 340 SRC_REL(ABSOLUTE), 341 SRC_SEL_X(SQ_SEL_X), 342 SRC_SEL_Y(SQ_SEL_Y), 343 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 344 LDS_REQ(0), 345 COALESCED_READ(0)); 346 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 347 DST_REL(0), 348 DST_SEL_X(SQ_SEL_X), 349 DST_SEL_Y(SQ_SEL_Y), 350 DST_SEL_Z(SQ_SEL_0), 351 DST_SEL_W(SQ_SEL_1), 352 USE_CONST_FIELDS(0), 353 DATA_FORMAT(FMT_32_32_FLOAT), 354 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 355 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 356 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 357 shader[i++] = VTX_DWORD2(OFFSET(0), 358#if X_BYTE_ORDER == X_BIG_ENDIAN 359 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 360#else 361 ENDIAN_SWAP(ENDIAN_NONE), 362#endif 363 CONST_BUF_NO_STRIDE(0), 364 ALT_CONST(0), 365 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 366 shader[i++] = VTX_DWORD_PAD; 367 /* 6/7 */ 368 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 369 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 370 FETCH_WHOLE_QUAD(0), 371 BUFFER_ID(0), 372 SRC_GPR(0), 373 SRC_REL(ABSOLUTE), 374 SRC_SEL_X(SQ_SEL_X), 375 SRC_SEL_Y(SQ_SEL_Y), 376 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 377 LDS_REQ(0), 378 COALESCED_READ(0)); 379 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 380 DST_REL(0), 381 DST_SEL_X(SQ_SEL_X), 382 DST_SEL_Y(SQ_SEL_Y), 383 DST_SEL_Z(SQ_SEL_0), 384 DST_SEL_W(SQ_SEL_1), 385 USE_CONST_FIELDS(0), 386 DATA_FORMAT(FMT_32_32_FLOAT), 387 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 388 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 389 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 390 shader[i++] = VTX_DWORD2(OFFSET(8), 391#if X_BYTE_ORDER == X_BIG_ENDIAN 392 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 393#else 394 ENDIAN_SWAP(ENDIAN_NONE), 395#endif 396 CONST_BUF_NO_STRIDE(0), 397 ALT_CONST(0), 398 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 399 shader[i++] = VTX_DWORD_PAD; 400 401 return i; 402} 403 404/* copy ps --------------------------------------- */ 405int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 406{ 407 int i = 0; 408 409 /* CF INST 0 */ 410 shader[i++] = CF_ALU_DWORD0(ADDR(4), 411 KCACHE_BANK0(0), 412 KCACHE_BANK1(0), 413 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 414 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 415 KCACHE_ADDR0(0), 416 KCACHE_ADDR1(0), 417 I_COUNT(4), 418 ALT_CONST(0), 419 CF_INST(SQ_CF_INST_ALU), 420 WHOLE_QUAD_MODE(0), 421 BARRIER(1)); 422 /* CF INST 1 */ 423 shader[i++] = CF_DWORD0(ADDR(8), 424 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 425 shader[i++] = CF_DWORD1(POP_COUNT(0), 426 CF_CONST(0), 427 COND(SQ_CF_COND_ACTIVE), 428 I_COUNT(1), 429 VALID_PIXEL_MODE(0), 430 CF_INST(SQ_CF_INST_TC), 431 BARRIER(1)); 432 /* CF INST 2 */ 433 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 434 TYPE(SQ_EXPORT_PIXEL), 435 RW_GPR(0), 436 RW_REL(ABSOLUTE), 437 INDEX_GPR(0), 438 ELEM_SIZE(1)); 439 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 440 SRC_SEL_Y(SQ_SEL_Y), 441 SRC_SEL_Z(SQ_SEL_Z), 442 SRC_SEL_W(SQ_SEL_W), 443 BURST_COUNT(1), 444 VALID_PIXEL_MODE(0), 445 CF_INST(SQ_CF_INST_EXPORT_DONE), 446 MARK(0), 447 BARRIER(1)); 448 /* CF INST 3 - end */ 449 shader[i++] = CF_DWORD0(ADDR(0), 450 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 451 shader[i++] = CF_DWORD1(POP_COUNT(0), 452 CF_CONST(0), 453 COND(SQ_CF_COND_ACTIVE), 454 I_COUNT(0), 455 VALID_PIXEL_MODE(0), 456 CF_INST(SQ_CF_INST_END), 457 BARRIER(1)); 458 /* 4 interpolate tex coords */ 459 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 460 SRC0_REL(ABSOLUTE), 461 SRC0_ELEM(ELEM_Y), 462 SRC0_NEG(0), 463 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 464 SRC1_REL(ABSOLUTE), 465 SRC1_ELEM(ELEM_X), 466 SRC1_NEG(0), 467 INDEX_MODE(SQ_INDEX_AR_X), 468 PRED_SEL(SQ_PRED_SEL_OFF), 469 LAST(0)); 470 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 471 SRC1_ABS(0), 472 UPDATE_EXECUTE_MASK(0), 473 UPDATE_PRED(0), 474 WRITE_MASK(1), 475 OMOD(SQ_ALU_OMOD_OFF), 476 ALU_INST(SQ_OP2_INST_INTERP_XY), 477 BANK_SWIZZLE(SQ_ALU_VEC_210), 478 DST_GPR(0), 479 DST_REL(ABSOLUTE), 480 DST_ELEM(ELEM_X), 481 CLAMP(0)); 482 /* 5 */ 483 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 484 SRC0_REL(ABSOLUTE), 485 SRC0_ELEM(ELEM_X), 486 SRC0_NEG(0), 487 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 488 SRC1_REL(ABSOLUTE), 489 SRC1_ELEM(ELEM_X), 490 SRC1_NEG(0), 491 INDEX_MODE(SQ_INDEX_AR_X), 492 PRED_SEL(SQ_PRED_SEL_OFF), 493 LAST(0)); 494 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 495 SRC1_ABS(0), 496 UPDATE_EXECUTE_MASK(0), 497 UPDATE_PRED(0), 498 WRITE_MASK(1), 499 OMOD(SQ_ALU_OMOD_OFF), 500 ALU_INST(SQ_OP2_INST_INTERP_XY), 501 BANK_SWIZZLE(SQ_ALU_VEC_210), 502 DST_GPR(0), 503 DST_REL(ABSOLUTE), 504 DST_ELEM(ELEM_Y), 505 CLAMP(0)); 506 /* 6 */ 507 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 508 SRC0_REL(ABSOLUTE), 509 SRC0_ELEM(ELEM_Y), 510 SRC0_NEG(0), 511 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 512 SRC1_REL(ABSOLUTE), 513 SRC1_ELEM(ELEM_X), 514 SRC1_NEG(0), 515 INDEX_MODE(SQ_INDEX_AR_X), 516 PRED_SEL(SQ_PRED_SEL_OFF), 517 LAST(0)); 518 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 519 SRC1_ABS(0), 520 UPDATE_EXECUTE_MASK(0), 521 UPDATE_PRED(0), 522 WRITE_MASK(0), 523 OMOD(SQ_ALU_OMOD_OFF), 524 ALU_INST(SQ_OP2_INST_INTERP_XY), 525 BANK_SWIZZLE(SQ_ALU_VEC_210), 526 DST_GPR(0), 527 DST_REL(ABSOLUTE), 528 DST_ELEM(ELEM_Z), 529 CLAMP(0)); 530 /* 7 */ 531 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 532 SRC0_REL(ABSOLUTE), 533 SRC0_ELEM(ELEM_X), 534 SRC0_NEG(0), 535 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 536 SRC1_REL(ABSOLUTE), 537 SRC1_ELEM(ELEM_X), 538 SRC1_NEG(0), 539 INDEX_MODE(SQ_INDEX_AR_X), 540 PRED_SEL(SQ_PRED_SEL_OFF), 541 LAST(1)); 542 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 543 SRC1_ABS(0), 544 UPDATE_EXECUTE_MASK(0), 545 UPDATE_PRED(0), 546 WRITE_MASK(0), 547 OMOD(SQ_ALU_OMOD_OFF), 548 ALU_INST(SQ_OP2_INST_INTERP_XY), 549 BANK_SWIZZLE(SQ_ALU_VEC_210), 550 DST_GPR(0), 551 DST_REL(ABSOLUTE), 552 DST_ELEM(ELEM_W), 553 CLAMP(0)); 554 555 /* 8/9 TEX INST 0 */ 556 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 557 INST_MOD(0), 558 FETCH_WHOLE_QUAD(0), 559 RESOURCE_ID(0), 560 SRC_GPR(0), 561 SRC_REL(ABSOLUTE), 562 ALT_CONST(0), 563 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 564 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 565 shader[i++] = TEX_DWORD1(DST_GPR(0), 566 DST_REL(ABSOLUTE), 567 DST_SEL_X(SQ_SEL_X), /* R */ 568 DST_SEL_Y(SQ_SEL_Y), /* G */ 569 DST_SEL_Z(SQ_SEL_Z), /* B */ 570 DST_SEL_W(SQ_SEL_W), /* A */ 571 LOD_BIAS(0), 572 COORD_TYPE_X(TEX_UNNORMALIZED), 573 COORD_TYPE_Y(TEX_UNNORMALIZED), 574 COORD_TYPE_Z(TEX_UNNORMALIZED), 575 COORD_TYPE_W(TEX_UNNORMALIZED)); 576 shader[i++] = TEX_DWORD2(OFFSET_X(0), 577 OFFSET_Y(0), 578 OFFSET_Z(0), 579 SAMPLER_ID(0), 580 SRC_SEL_X(SQ_SEL_X), 581 SRC_SEL_Y(SQ_SEL_Y), 582 SRC_SEL_Z(SQ_SEL_0), 583 SRC_SEL_W(SQ_SEL_1)); 584 shader[i++] = TEX_DWORD_PAD; 585 586 return i; 587} 588 589int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 590{ 591 int i = 0; 592 593 /* 0 */ 594 shader[i++] = CF_DWORD0(ADDR(8), 595 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 596 shader[i++] = CF_DWORD1(POP_COUNT(0), 597 CF_CONST(0), 598 COND(SQ_CF_COND_ACTIVE), 599 I_COUNT(2), 600 VALID_PIXEL_MODE(0), 601 CF_INST(SQ_CF_INST_TC), 602 BARRIER(1)); 603 604 /* 1 - ALU */ 605 shader[i++] = CF_ALU_DWORD0(ADDR(5), 606 KCACHE_BANK0(0), 607 KCACHE_BANK1(0), 608 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 609 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 610 KCACHE_ADDR0(0), 611 KCACHE_ADDR1(0), 612 I_COUNT(2), 613 ALT_CONST(0), 614 CF_INST(SQ_CF_INST_ALU), 615 WHOLE_QUAD_MODE(0), 616 BARRIER(1)); 617 618 /* 2 */ 619 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 620 TYPE(SQ_EXPORT_POS), 621 RW_GPR(1), 622 RW_REL(ABSOLUTE), 623 INDEX_GPR(0), 624 ELEM_SIZE(3)); 625 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 626 SRC_SEL_Y(SQ_SEL_Y), 627 SRC_SEL_Z(SQ_SEL_Z), 628 SRC_SEL_W(SQ_SEL_W), 629 BURST_COUNT(1), 630 VALID_PIXEL_MODE(0), 631 CF_INST(SQ_CF_INST_EXPORT_DONE), 632 MARK(0), 633 BARRIER(1)); 634 /* 3 */ 635 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 636 TYPE(SQ_EXPORT_PARAM), 637 RW_GPR(0), 638 RW_REL(ABSOLUTE), 639 INDEX_GPR(0), 640 ELEM_SIZE(3)); 641 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 642 SRC_SEL_Y(SQ_SEL_Y), 643 SRC_SEL_Z(SQ_SEL_Z), 644 SRC_SEL_W(SQ_SEL_W), 645 BURST_COUNT(1), 646 VALID_PIXEL_MODE(0), 647 CF_INST(SQ_CF_INST_EXPORT_DONE), 648 MARK(0), 649 BARRIER(0)); 650 /* 4 - end */ 651 shader[i++] = CF_DWORD0(ADDR(0), 652 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 653 shader[i++] = CF_DWORD1(POP_COUNT(0), 654 CF_CONST(0), 655 COND(SQ_CF_COND_ACTIVE), 656 I_COUNT(0), 657 VALID_PIXEL_MODE(0), 658 CF_INST(SQ_CF_INST_END), 659 BARRIER(1)); 660 /* 5 texX / w */ 661 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 662 SRC0_REL(ABSOLUTE), 663 SRC0_ELEM(ELEM_X), 664 SRC0_NEG(0), 665 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 666 SRC1_REL(ABSOLUTE), 667 SRC1_ELEM(ELEM_X), 668 SRC1_NEG(0), 669 INDEX_MODE(SQ_INDEX_AR_X), 670 PRED_SEL(SQ_PRED_SEL_OFF), 671 LAST(0)); 672 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 673 SRC1_ABS(0), 674 UPDATE_EXECUTE_MASK(0), 675 UPDATE_PRED(0), 676 WRITE_MASK(1), 677 OMOD(SQ_ALU_OMOD_OFF), 678 ALU_INST(SQ_OP2_INST_MUL), 679 BANK_SWIZZLE(SQ_ALU_VEC_012), 680 DST_GPR(0), 681 DST_REL(ABSOLUTE), 682 DST_ELEM(ELEM_X), 683 CLAMP(0)); 684 685 /* 6 texY / h */ 686 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 687 SRC0_REL(ABSOLUTE), 688 SRC0_ELEM(ELEM_Y), 689 SRC0_NEG(0), 690 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 691 SRC1_REL(ABSOLUTE), 692 SRC1_ELEM(ELEM_Y), 693 SRC1_NEG(0), 694 INDEX_MODE(SQ_INDEX_AR_X), 695 PRED_SEL(SQ_PRED_SEL_OFF), 696 LAST(1)); 697 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 698 SRC1_ABS(0), 699 UPDATE_EXECUTE_MASK(0), 700 UPDATE_PRED(0), 701 WRITE_MASK(1), 702 OMOD(SQ_ALU_OMOD_OFF), 703 ALU_INST(SQ_OP2_INST_MUL), 704 BANK_SWIZZLE(SQ_ALU_VEC_012), 705 DST_GPR(0), 706 DST_REL(ABSOLUTE), 707 DST_ELEM(ELEM_Y), 708 CLAMP(0)); 709 710 /* 7 - padding */ 711 shader[i++] = 0x00000000; 712 shader[i++] = 0x00000000; 713 /* 8/9 */ 714 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 715 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 716 FETCH_WHOLE_QUAD(0), 717 BUFFER_ID(0), 718 SRC_GPR(0), 719 SRC_REL(ABSOLUTE), 720 SRC_SEL_X(SQ_SEL_X), 721 SRC_SEL_Y(SQ_SEL_Y), 722 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 723 LDS_REQ(0), 724 COALESCED_READ(0)); 725 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 726 DST_REL(ABSOLUTE), 727 DST_SEL_X(SQ_SEL_X), 728 DST_SEL_Y(SQ_SEL_Y), 729 DST_SEL_Z(SQ_SEL_0), 730 DST_SEL_W(SQ_SEL_1), 731 USE_CONST_FIELDS(0), 732 DATA_FORMAT(FMT_32_32_FLOAT), 733 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 734 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 735 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 736 shader[i++] = VTX_DWORD2(OFFSET(0), 737#if X_BYTE_ORDER == X_BIG_ENDIAN 738 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 739#else 740 ENDIAN_SWAP(ENDIAN_NONE), 741#endif 742 CONST_BUF_NO_STRIDE(0), 743 ALT_CONST(0), 744 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 745 shader[i++] = VTX_DWORD_PAD; 746 /* 10/11 */ 747 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 748 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 749 FETCH_WHOLE_QUAD(0), 750 BUFFER_ID(0), 751 SRC_GPR(0), 752 SRC_REL(ABSOLUTE), 753 SRC_SEL_X(SQ_SEL_X), 754 SRC_SEL_Y(SQ_SEL_Y), 755 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 756 LDS_REQ(0), 757 COALESCED_READ(0)); 758 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 759 DST_REL(ABSOLUTE), 760 DST_SEL_X(SQ_SEL_X), 761 DST_SEL_Y(SQ_SEL_Y), 762 DST_SEL_Z(SQ_SEL_0), 763 DST_SEL_W(SQ_SEL_1), 764 USE_CONST_FIELDS(0), 765 DATA_FORMAT(FMT_32_32_FLOAT), 766 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 767 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 768 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 769 shader[i++] = VTX_DWORD2(OFFSET(8), 770#if X_BYTE_ORDER == X_BIG_ENDIAN 771 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 772#else 773 ENDIAN_SWAP(ENDIAN_NONE), 774#endif 775 CONST_BUF_NO_STRIDE(0), 776 ALT_CONST(0), 777 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 778 shader[i++] = VTX_DWORD_PAD; 779 780 return i; 781} 782 783int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 784{ 785 int i = 0; 786 787 /* 0 */ 788 shader[i++] = CF_ALU_DWORD0(ADDR(6), 789 KCACHE_BANK0(0), 790 KCACHE_BANK1(0), 791 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 792 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 793 KCACHE_ADDR0(0), 794 KCACHE_ADDR1(0), 795 I_COUNT(4), 796 ALT_CONST(0), 797 CF_INST(SQ_CF_INST_ALU), 798 WHOLE_QUAD_MODE(0), 799 BARRIER(1)); 800 /* 1 */ 801 shader[i++] = CF_DWORD0(ADDR(22), 802 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 803 shader[i++] = CF_DWORD1(POP_COUNT(0), 804 CF_CONST(0), 805 COND(SQ_CF_COND_BOOL), 806 I_COUNT(0), 807 VALID_PIXEL_MODE(0), 808 CF_INST(SQ_CF_INST_CALL), 809 BARRIER(0)); 810 /* 2 */ 811 shader[i++] = CF_DWORD0(ADDR(30), 812 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 813 shader[i++] = CF_DWORD1(POP_COUNT(0), 814 CF_CONST(0), 815 COND(SQ_CF_COND_NOT_BOOL), 816 I_COUNT(0), 817 VALID_PIXEL_MODE(0), 818 CF_INST(SQ_CF_INST_CALL), 819 BARRIER(0)); 820 /* 3 */ 821 shader[i++] = CF_ALU_DWORD0(ADDR(10), 822 KCACHE_BANK0(0), 823 KCACHE_BANK1(0), 824 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 825 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 826 KCACHE_ADDR0(0), 827 KCACHE_ADDR1(0), 828 I_COUNT(12), 829 ALT_CONST(0), 830 CF_INST(SQ_CF_INST_ALU), 831 WHOLE_QUAD_MODE(0), 832 BARRIER(1)); 833 /* 4 */ 834 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 835 TYPE(SQ_EXPORT_PIXEL), 836 RW_GPR(2), 837 RW_REL(ABSOLUTE), 838 INDEX_GPR(0), 839 ELEM_SIZE(3)); 840 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 841 SRC_SEL_Y(SQ_SEL_Y), 842 SRC_SEL_Z(SQ_SEL_Z), 843 SRC_SEL_W(SQ_SEL_W), 844 BURST_COUNT(1), 845 VALID_PIXEL_MODE(0), 846 CF_INST(SQ_CF_INST_EXPORT_DONE), 847 MARK(0), 848 BARRIER(1)); 849 /* 5 - end */ 850 shader[i++] = CF_DWORD0(ADDR(0), 851 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 852 shader[i++] = CF_DWORD1(POP_COUNT(0), 853 CF_CONST(0), 854 COND(SQ_CF_COND_ACTIVE), 855 I_COUNT(0), 856 VALID_PIXEL_MODE(0), 857 CF_INST(SQ_CF_INST_END), 858 BARRIER(1)); 859 /* 6 interpolate tex coords */ 860 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 861 SRC0_REL(ABSOLUTE), 862 SRC0_ELEM(ELEM_Y), 863 SRC0_NEG(0), 864 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 865 SRC1_REL(ABSOLUTE), 866 SRC1_ELEM(ELEM_X), 867 SRC1_NEG(0), 868 INDEX_MODE(SQ_INDEX_AR_X), 869 PRED_SEL(SQ_PRED_SEL_OFF), 870 LAST(0)); 871 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 872 SRC1_ABS(0), 873 UPDATE_EXECUTE_MASK(0), 874 UPDATE_PRED(0), 875 WRITE_MASK(1), 876 OMOD(SQ_ALU_OMOD_OFF), 877 ALU_INST(SQ_OP2_INST_INTERP_XY), 878 BANK_SWIZZLE(SQ_ALU_VEC_210), 879 DST_GPR(0), 880 DST_REL(ABSOLUTE), 881 DST_ELEM(ELEM_X), 882 CLAMP(0)); 883 /* 7 */ 884 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 885 SRC0_REL(ABSOLUTE), 886 SRC0_ELEM(ELEM_X), 887 SRC0_NEG(0), 888 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 889 SRC1_REL(ABSOLUTE), 890 SRC1_ELEM(ELEM_X), 891 SRC1_NEG(0), 892 INDEX_MODE(SQ_INDEX_AR_X), 893 PRED_SEL(SQ_PRED_SEL_OFF), 894 LAST(0)); 895 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 896 SRC1_ABS(0), 897 UPDATE_EXECUTE_MASK(0), 898 UPDATE_PRED(0), 899 WRITE_MASK(1), 900 OMOD(SQ_ALU_OMOD_OFF), 901 ALU_INST(SQ_OP2_INST_INTERP_XY), 902 BANK_SWIZZLE(SQ_ALU_VEC_210), 903 DST_GPR(0), 904 DST_REL(ABSOLUTE), 905 DST_ELEM(ELEM_Y), 906 CLAMP(0)); 907 /* 8 */ 908 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 909 SRC0_REL(ABSOLUTE), 910 SRC0_ELEM(ELEM_Y), 911 SRC0_NEG(0), 912 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 913 SRC1_REL(ABSOLUTE), 914 SRC1_ELEM(ELEM_X), 915 SRC1_NEG(0), 916 INDEX_MODE(SQ_INDEX_AR_X), 917 PRED_SEL(SQ_PRED_SEL_OFF), 918 LAST(0)); 919 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 920 SRC1_ABS(0), 921 UPDATE_EXECUTE_MASK(0), 922 UPDATE_PRED(0), 923 WRITE_MASK(0), 924 OMOD(SQ_ALU_OMOD_OFF), 925 ALU_INST(SQ_OP2_INST_INTERP_XY), 926 BANK_SWIZZLE(SQ_ALU_VEC_210), 927 DST_GPR(0), 928 DST_REL(ABSOLUTE), 929 DST_ELEM(ELEM_Z), 930 CLAMP(0)); 931 /* 9 */ 932 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 933 SRC0_REL(ABSOLUTE), 934 SRC0_ELEM(ELEM_X), 935 SRC0_NEG(0), 936 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 937 SRC1_REL(ABSOLUTE), 938 SRC1_ELEM(ELEM_X), 939 SRC1_NEG(0), 940 INDEX_MODE(SQ_INDEX_AR_X), 941 PRED_SEL(SQ_PRED_SEL_OFF), 942 LAST(1)); 943 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 944 SRC1_ABS(0), 945 UPDATE_EXECUTE_MASK(0), 946 UPDATE_PRED(0), 947 WRITE_MASK(0), 948 OMOD(SQ_ALU_OMOD_OFF), 949 ALU_INST(SQ_OP2_INST_INTERP_XY), 950 BANK_SWIZZLE(SQ_ALU_VEC_210), 951 DST_GPR(0), 952 DST_REL(ABSOLUTE), 953 DST_ELEM(ELEM_W), 954 CLAMP(0)); 955 956 /* 10,11,12,13 */ 957 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 958 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 959 SRC0_REL(ABSOLUTE), 960 SRC0_ELEM(ELEM_W), 961 SRC0_NEG(0), 962 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 963 SRC1_REL(ABSOLUTE), 964 SRC1_ELEM(ELEM_X), 965 SRC1_NEG(0), 966 INDEX_MODE(SQ_INDEX_LOOP), 967 PRED_SEL(SQ_PRED_SEL_OFF), 968 LAST(0)); 969 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 970 SRC2_REL(ABSOLUTE), 971 SRC2_ELEM(ELEM_X), 972 SRC2_NEG(0), 973 ALU_INST(SQ_OP3_INST_MULADD), 974 BANK_SWIZZLE(SQ_ALU_VEC_012), 975 DST_GPR(2), 976 DST_REL(ABSOLUTE), 977 DST_ELEM(ELEM_X), 978 CLAMP(0)); 979 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 980 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 981 SRC0_REL(ABSOLUTE), 982 SRC0_ELEM(ELEM_W), 983 SRC0_NEG(0), 984 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 985 SRC1_REL(ABSOLUTE), 986 SRC1_ELEM(ELEM_X), 987 SRC1_NEG(0), 988 INDEX_MODE(SQ_INDEX_LOOP), 989 PRED_SEL(SQ_PRED_SEL_OFF), 990 LAST(0)); 991 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 992 SRC2_REL(ABSOLUTE), 993 SRC2_ELEM(ELEM_Y), 994 SRC2_NEG(0), 995 ALU_INST(SQ_OP3_INST_MULADD), 996 BANK_SWIZZLE(SQ_ALU_VEC_012), 997 DST_GPR(2), 998 DST_REL(ABSOLUTE), 999 DST_ELEM(ELEM_Y), 1000 CLAMP(0)); 1001 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 1002 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 1003 SRC0_REL(ABSOLUTE), 1004 SRC0_ELEM(ELEM_W), 1005 SRC0_NEG(0), 1006 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1007 SRC1_REL(ABSOLUTE), 1008 SRC1_ELEM(ELEM_X), 1009 SRC1_NEG(0), 1010 INDEX_MODE(SQ_INDEX_LOOP), 1011 PRED_SEL(SQ_PRED_SEL_OFF), 1012 LAST(0)); 1013 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 1014 SRC2_REL(ABSOLUTE), 1015 SRC2_ELEM(ELEM_Z), 1016 SRC2_NEG(0), 1017 ALU_INST(SQ_OP3_INST_MULADD), 1018 BANK_SWIZZLE(SQ_ALU_VEC_012), 1019 DST_GPR(2), 1020 DST_REL(ABSOLUTE), 1021 DST_ELEM(ELEM_Z), 1022 CLAMP(0)); 1023 /* r2.w = MAD(0, 0, 1) */ 1024 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1025 SRC0_REL(ABSOLUTE), 1026 SRC0_ELEM(ELEM_X), 1027 SRC0_NEG(0), 1028 SRC1_SEL(SQ_ALU_SRC_0), 1029 SRC1_REL(ABSOLUTE), 1030 SRC1_ELEM(ELEM_X), 1031 SRC1_NEG(0), 1032 INDEX_MODE(SQ_INDEX_LOOP), 1033 PRED_SEL(SQ_PRED_SEL_OFF), 1034 LAST(1)); 1035 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1036 SRC2_REL(ABSOLUTE), 1037 SRC2_ELEM(ELEM_X), 1038 SRC2_NEG(0), 1039 ALU_INST(SQ_OP3_INST_MULADD), 1040 BANK_SWIZZLE(SQ_ALU_VEC_012), 1041 DST_GPR(2), 1042 DST_REL(ABSOLUTE), 1043 DST_ELEM(ELEM_W), 1044 CLAMP(0)); 1045 1046 /* 14,15,16,17 */ 1047 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 1048 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1049 SRC0_REL(ABSOLUTE), 1050 SRC0_ELEM(ELEM_X), 1051 SRC0_NEG(0), 1052 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1053 SRC1_REL(ABSOLUTE), 1054 SRC1_ELEM(ELEM_Y), 1055 SRC1_NEG(0), 1056 INDEX_MODE(SQ_INDEX_LOOP), 1057 PRED_SEL(SQ_PRED_SEL_OFF), 1058 LAST(0)); 1059 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1060 SRC2_REL(ABSOLUTE), 1061 SRC2_ELEM(ELEM_X), 1062 SRC2_NEG(0), 1063 ALU_INST(SQ_OP3_INST_MULADD), 1064 BANK_SWIZZLE(SQ_ALU_VEC_012), 1065 DST_GPR(2), 1066 DST_REL(ABSOLUTE), 1067 DST_ELEM(ELEM_X), 1068 CLAMP(0)); 1069 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 1070 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1071 SRC0_REL(ABSOLUTE), 1072 SRC0_ELEM(ELEM_Y), 1073 SRC0_NEG(0), 1074 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1075 SRC1_REL(ABSOLUTE), 1076 SRC1_ELEM(ELEM_Y), 1077 SRC1_NEG(0), 1078 INDEX_MODE(SQ_INDEX_LOOP), 1079 PRED_SEL(SQ_PRED_SEL_OFF), 1080 LAST(0)); 1081 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1082 SRC2_REL(ABSOLUTE), 1083 SRC2_ELEM(ELEM_Y), 1084 SRC2_NEG(0), 1085 ALU_INST(SQ_OP3_INST_MULADD), 1086 BANK_SWIZZLE(SQ_ALU_VEC_012), 1087 DST_GPR(2), 1088 DST_REL(ABSOLUTE), 1089 DST_ELEM(ELEM_Y), 1090 CLAMP(0)); 1091 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 1092 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1093 SRC0_REL(ABSOLUTE), 1094 SRC0_ELEM(ELEM_Z), 1095 SRC0_NEG(0), 1096 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1097 SRC1_REL(ABSOLUTE), 1098 SRC1_ELEM(ELEM_Y), 1099 SRC1_NEG(0), 1100 INDEX_MODE(SQ_INDEX_LOOP), 1101 PRED_SEL(SQ_PRED_SEL_OFF), 1102 LAST(0)); 1103 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1104 SRC2_REL(ABSOLUTE), 1105 SRC2_ELEM(ELEM_Z), 1106 SRC2_NEG(0), 1107 ALU_INST(SQ_OP3_INST_MULADD), 1108 BANK_SWIZZLE(SQ_ALU_VEC_012), 1109 DST_GPR(2), 1110 DST_REL(ABSOLUTE), 1111 DST_ELEM(ELEM_Z), 1112 CLAMP(0)); 1113 /* r2.w = MAD(0, 0, 1) */ 1114 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1115 SRC0_REL(ABSOLUTE), 1116 SRC0_ELEM(ELEM_X), 1117 SRC0_NEG(0), 1118 SRC1_SEL(SQ_ALU_SRC_0), 1119 SRC1_REL(ABSOLUTE), 1120 SRC1_ELEM(ELEM_X), 1121 SRC1_NEG(0), 1122 INDEX_MODE(SQ_INDEX_LOOP), 1123 PRED_SEL(SQ_PRED_SEL_OFF), 1124 LAST(1)); 1125 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1126 SRC2_REL(ABSOLUTE), 1127 SRC2_ELEM(ELEM_W), 1128 SRC2_NEG(0), 1129 ALU_INST(SQ_OP3_INST_MULADD), 1130 BANK_SWIZZLE(SQ_ALU_VEC_012), 1131 DST_GPR(2), 1132 DST_REL(ABSOLUTE), 1133 DST_ELEM(ELEM_W), 1134 CLAMP(0)); 1135 /* 18,19,20,21 */ 1136 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 1137 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1138 SRC0_REL(ABSOLUTE), 1139 SRC0_ELEM(ELEM_X), 1140 SRC0_NEG(0), 1141 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1142 SRC1_REL(ABSOLUTE), 1143 SRC1_ELEM(ELEM_Z), 1144 SRC1_NEG(0), 1145 INDEX_MODE(SQ_INDEX_LOOP), 1146 PRED_SEL(SQ_PRED_SEL_OFF), 1147 LAST(0)); 1148 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1149 SRC2_REL(ABSOLUTE), 1150 SRC2_ELEM(ELEM_X), 1151 SRC2_NEG(0), 1152 ALU_INST(SQ_OP3_INST_MULADD), 1153 BANK_SWIZZLE(SQ_ALU_VEC_012), 1154 DST_GPR(2), 1155 DST_REL(ABSOLUTE), 1156 DST_ELEM(ELEM_X), 1157 CLAMP(1)); 1158 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 1159 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1160 SRC0_REL(ABSOLUTE), 1161 SRC0_ELEM(ELEM_Y), 1162 SRC0_NEG(0), 1163 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1164 SRC1_REL(ABSOLUTE), 1165 SRC1_ELEM(ELEM_Z), 1166 SRC1_NEG(0), 1167 INDEX_MODE(SQ_INDEX_LOOP), 1168 PRED_SEL(SQ_PRED_SEL_OFF), 1169 LAST(0)); 1170 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1171 SRC2_REL(ABSOLUTE), 1172 SRC2_ELEM(ELEM_Y), 1173 SRC2_NEG(0), 1174 ALU_INST(SQ_OP3_INST_MULADD), 1175 BANK_SWIZZLE(SQ_ALU_VEC_012), 1176 DST_GPR(2), 1177 DST_REL(ABSOLUTE), 1178 DST_ELEM(ELEM_Y), 1179 CLAMP(1)); 1180 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 1181 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1182 SRC0_REL(ABSOLUTE), 1183 SRC0_ELEM(ELEM_Z), 1184 SRC0_NEG(0), 1185 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1186 SRC1_REL(ABSOLUTE), 1187 SRC1_ELEM(ELEM_Z), 1188 SRC1_NEG(0), 1189 INDEX_MODE(SQ_INDEX_LOOP), 1190 PRED_SEL(SQ_PRED_SEL_OFF), 1191 LAST(0)); 1192 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1193 SRC2_REL(ABSOLUTE), 1194 SRC2_ELEM(ELEM_Z), 1195 SRC2_NEG(0), 1196 ALU_INST(SQ_OP3_INST_MULADD), 1197 BANK_SWIZZLE(SQ_ALU_VEC_012), 1198 DST_GPR(2), 1199 DST_REL(ABSOLUTE), 1200 DST_ELEM(ELEM_Z), 1201 CLAMP(1)); 1202 /* r2.w = MAD(0, 0, 1) */ 1203 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1204 SRC0_REL(ABSOLUTE), 1205 SRC0_ELEM(ELEM_X), 1206 SRC0_NEG(0), 1207 SRC1_SEL(SQ_ALU_SRC_0), 1208 SRC1_REL(ABSOLUTE), 1209 SRC1_ELEM(ELEM_X), 1210 SRC1_NEG(0), 1211 INDEX_MODE(SQ_INDEX_LOOP), 1212 PRED_SEL(SQ_PRED_SEL_OFF), 1213 LAST(1)); 1214 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1215 SRC2_REL(ABSOLUTE), 1216 SRC2_ELEM(ELEM_X), 1217 SRC2_NEG(0), 1218 ALU_INST(SQ_OP3_INST_MULADD), 1219 BANK_SWIZZLE(SQ_ALU_VEC_012), 1220 DST_GPR(2), 1221 DST_REL(ABSOLUTE), 1222 DST_ELEM(ELEM_W), 1223 CLAMP(1)); 1224 1225 /* 22 */ 1226 shader[i++] = CF_DWORD0(ADDR(24), 1227 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1228 shader[i++] = CF_DWORD1(POP_COUNT(0), 1229 CF_CONST(0), 1230 COND(SQ_CF_COND_ACTIVE), 1231 I_COUNT(3), 1232 VALID_PIXEL_MODE(0), 1233 CF_INST(SQ_CF_INST_TC), 1234 BARRIER(1)); 1235 /* 23 */ 1236 shader[i++] = CF_DWORD0(ADDR(0), 1237 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1238 shader[i++] = CF_DWORD1(POP_COUNT(0), 1239 CF_CONST(0), 1240 COND(SQ_CF_COND_ACTIVE), 1241 I_COUNT(0), 1242 VALID_PIXEL_MODE(0), 1243 CF_INST(SQ_CF_INST_RETURN), 1244 BARRIER(1)); 1245 /* 24/25 */ 1246 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1247 INST_MOD(0), 1248 FETCH_WHOLE_QUAD(0), 1249 RESOURCE_ID(0), 1250 SRC_GPR(0), 1251 SRC_REL(ABSOLUTE), 1252 ALT_CONST(0), 1253 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1254 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1255 shader[i++] = TEX_DWORD1(DST_GPR(1), 1256 DST_REL(ABSOLUTE), 1257 DST_SEL_X(SQ_SEL_X), 1258 DST_SEL_Y(SQ_SEL_MASK), 1259 DST_SEL_Z(SQ_SEL_MASK), 1260 DST_SEL_W(SQ_SEL_1), 1261 LOD_BIAS(0), 1262 COORD_TYPE_X(TEX_NORMALIZED), 1263 COORD_TYPE_Y(TEX_NORMALIZED), 1264 COORD_TYPE_Z(TEX_NORMALIZED), 1265 COORD_TYPE_W(TEX_NORMALIZED)); 1266 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1267 OFFSET_Y(0), 1268 OFFSET_Z(0), 1269 SAMPLER_ID(0), 1270 SRC_SEL_X(SQ_SEL_X), 1271 SRC_SEL_Y(SQ_SEL_Y), 1272 SRC_SEL_Z(SQ_SEL_0), 1273 SRC_SEL_W(SQ_SEL_1)); 1274 shader[i++] = TEX_DWORD_PAD; 1275 /* 26/27 */ 1276 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1277 INST_MOD(0), 1278 FETCH_WHOLE_QUAD(0), 1279 RESOURCE_ID(1), 1280 SRC_GPR(0), 1281 SRC_REL(ABSOLUTE), 1282 ALT_CONST(0), 1283 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1284 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1285 shader[i++] = TEX_DWORD1(DST_GPR(1), 1286 DST_REL(ABSOLUTE), 1287 DST_SEL_X(SQ_SEL_MASK), 1288 DST_SEL_Y(SQ_SEL_MASK), 1289 DST_SEL_Z(SQ_SEL_X), 1290 DST_SEL_W(SQ_SEL_MASK), 1291 LOD_BIAS(0), 1292 COORD_TYPE_X(TEX_NORMALIZED), 1293 COORD_TYPE_Y(TEX_NORMALIZED), 1294 COORD_TYPE_Z(TEX_NORMALIZED), 1295 COORD_TYPE_W(TEX_NORMALIZED)); 1296 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1297 OFFSET_Y(0), 1298 OFFSET_Z(0), 1299 SAMPLER_ID(1), 1300 SRC_SEL_X(SQ_SEL_X), 1301 SRC_SEL_Y(SQ_SEL_Y), 1302 SRC_SEL_Z(SQ_SEL_0), 1303 SRC_SEL_W(SQ_SEL_1)); 1304 shader[i++] = TEX_DWORD_PAD; 1305 /* 28/29 */ 1306 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1307 INST_MOD(0), 1308 FETCH_WHOLE_QUAD(0), 1309 RESOURCE_ID(2), 1310 SRC_GPR(0), 1311 SRC_REL(ABSOLUTE), 1312 ALT_CONST(0), 1313 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1314 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1315 shader[i++] = TEX_DWORD1(DST_GPR(1), 1316 DST_REL(ABSOLUTE), 1317 DST_SEL_X(SQ_SEL_MASK), 1318 DST_SEL_Y(SQ_SEL_X), 1319 DST_SEL_Z(SQ_SEL_MASK), 1320 DST_SEL_W(SQ_SEL_MASK), 1321 LOD_BIAS(0), 1322 COORD_TYPE_X(TEX_NORMALIZED), 1323 COORD_TYPE_Y(TEX_NORMALIZED), 1324 COORD_TYPE_Z(TEX_NORMALIZED), 1325 COORD_TYPE_W(TEX_NORMALIZED)); 1326 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1327 OFFSET_Y(0), 1328 OFFSET_Z(0), 1329 SAMPLER_ID(2), 1330 SRC_SEL_X(SQ_SEL_X), 1331 SRC_SEL_Y(SQ_SEL_Y), 1332 SRC_SEL_Z(SQ_SEL_0), 1333 SRC_SEL_W(SQ_SEL_1)); 1334 shader[i++] = TEX_DWORD_PAD; 1335 /* 30 */ 1336 shader[i++] = CF_DWORD0(ADDR(32), 1337 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1338 shader[i++] = CF_DWORD1(POP_COUNT(0), 1339 CF_CONST(0), 1340 COND(SQ_CF_COND_ACTIVE), 1341 I_COUNT(1), 1342 VALID_PIXEL_MODE(0), 1343 CF_INST(SQ_CF_INST_TC), 1344 BARRIER(1)); 1345 /* 31 */ 1346 shader[i++] = CF_DWORD0(ADDR(0), 1347 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1348 shader[i++] = CF_DWORD1(POP_COUNT(0), 1349 CF_CONST(0), 1350 COND(SQ_CF_COND_ACTIVE), 1351 I_COUNT(0), 1352 VALID_PIXEL_MODE(0), 1353 CF_INST(SQ_CF_INST_RETURN), 1354 BARRIER(1)); 1355 /* 32/33 */ 1356 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1357 INST_MOD(0), 1358 FETCH_WHOLE_QUAD(0), 1359 RESOURCE_ID(0), 1360 SRC_GPR(0), 1361 SRC_REL(ABSOLUTE), 1362 ALT_CONST(0), 1363 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1364 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1365 shader[i++] = TEX_DWORD1(DST_GPR(1), 1366 DST_REL(ABSOLUTE), 1367 DST_SEL_X(SQ_SEL_X), 1368 DST_SEL_Y(SQ_SEL_Y), 1369 DST_SEL_Z(SQ_SEL_Z), 1370 DST_SEL_W(SQ_SEL_1), 1371 LOD_BIAS(0), 1372 COORD_TYPE_X(TEX_NORMALIZED), 1373 COORD_TYPE_Y(TEX_NORMALIZED), 1374 COORD_TYPE_Z(TEX_NORMALIZED), 1375 COORD_TYPE_W(TEX_NORMALIZED)); 1376 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1377 OFFSET_Y(0), 1378 OFFSET_Z(0), 1379 SAMPLER_ID(0), 1380 SRC_SEL_X(SQ_SEL_X), 1381 SRC_SEL_Y(SQ_SEL_Y), 1382 SRC_SEL_Z(SQ_SEL_0), 1383 SRC_SEL_W(SQ_SEL_1)); 1384 shader[i++] = TEX_DWORD_PAD; 1385 1386 return i; 1387} 1388 1389/* comp vs --------------------------------------- */ 1390int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1391{ 1392 int i = 0; 1393 1394 /* 0 */ 1395 shader[i++] = CF_DWORD0(ADDR(3), 1396 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1397 shader[i++] = CF_DWORD1(POP_COUNT(0), 1398 CF_CONST(0), 1399 COND(SQ_CF_COND_BOOL), 1400 I_COUNT(0), 1401 VALID_PIXEL_MODE(0), 1402 CF_INST(SQ_CF_INST_CALL), 1403 BARRIER(0)); 1404 /* 1 */ 1405 shader[i++] = CF_DWORD0(ADDR(9), 1406 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1407 shader[i++] = CF_DWORD1(POP_COUNT(0), 1408 CF_CONST(0), 1409 COND(SQ_CF_COND_NOT_BOOL), 1410 I_COUNT(0), 1411 VALID_PIXEL_MODE(0), 1412 CF_INST(SQ_CF_INST_CALL), 1413 BARRIER(0)); 1414 /* 2 - end */ 1415 shader[i++] = CF_DWORD0(ADDR(0), 1416 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1417 shader[i++] = CF_DWORD1(POP_COUNT(0), 1418 CF_CONST(0), 1419 COND(SQ_CF_COND_ACTIVE), 1420 I_COUNT(0), 1421 VALID_PIXEL_MODE(0), 1422 CF_INST(SQ_CF_INST_END), 1423 BARRIER(1)); 1424 /* 3 - mask sub */ 1425 shader[i++] = CF_DWORD0(ADDR(44), 1426 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1427 shader[i++] = CF_DWORD1(POP_COUNT(0), 1428 CF_CONST(0), 1429 COND(SQ_CF_COND_ACTIVE), 1430 I_COUNT(3), 1431 VALID_PIXEL_MODE(0), 1432 CF_INST(SQ_CF_INST_TC), 1433 BARRIER(1)); 1434 1435 /* 4 - ALU */ 1436 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1437 KCACHE_BANK0(0), 1438 KCACHE_BANK1(0), 1439 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1440 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1441 KCACHE_ADDR0(0), 1442 KCACHE_ADDR1(0), 1443 I_COUNT(20), 1444 ALT_CONST(0), 1445 CF_INST(SQ_CF_INST_ALU), 1446 WHOLE_QUAD_MODE(0), 1447 BARRIER(1)); 1448 1449 /* 5 - dst */ 1450 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1451 TYPE(SQ_EXPORT_POS), 1452 RW_GPR(2), 1453 RW_REL(ABSOLUTE), 1454 INDEX_GPR(0), 1455 ELEM_SIZE(0)); 1456 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1457 SRC_SEL_Y(SQ_SEL_Y), 1458 SRC_SEL_Z(SQ_SEL_0), 1459 SRC_SEL_W(SQ_SEL_1), 1460 BURST_COUNT(1), 1461 VALID_PIXEL_MODE(0), 1462 CF_INST(SQ_CF_INST_EXPORT_DONE), 1463 MARK(0), 1464 BARRIER(1)); 1465 /* 6 - src */ 1466 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1467 TYPE(SQ_EXPORT_PARAM), 1468 RW_GPR(1), 1469 RW_REL(ABSOLUTE), 1470 INDEX_GPR(0), 1471 ELEM_SIZE(0)); 1472 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1473 SRC_SEL_Y(SQ_SEL_Y), 1474 SRC_SEL_Z(SQ_SEL_0), 1475 SRC_SEL_W(SQ_SEL_1), 1476 BURST_COUNT(1), 1477 VALID_PIXEL_MODE(0), 1478 CF_INST(SQ_CF_INST_EXPORT), 1479 MARK(0), 1480 BARRIER(0)); 1481 /* 7 - mask */ 1482 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1483 TYPE(SQ_EXPORT_PARAM), 1484 RW_GPR(0), 1485 RW_REL(ABSOLUTE), 1486 INDEX_GPR(0), 1487 ELEM_SIZE(0)); 1488 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1489 SRC_SEL_Y(SQ_SEL_Y), 1490 SRC_SEL_Z(SQ_SEL_0), 1491 SRC_SEL_W(SQ_SEL_1), 1492 BURST_COUNT(1), 1493 VALID_PIXEL_MODE(0), 1494 CF_INST(SQ_CF_INST_EXPORT_DONE), 1495 MARK(0), 1496 BARRIER(0)); 1497 /* 8 */ 1498 shader[i++] = CF_DWORD0(ADDR(0), 1499 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1500 shader[i++] = CF_DWORD1(POP_COUNT(0), 1501 CF_CONST(0), 1502 COND(SQ_CF_COND_ACTIVE), 1503 I_COUNT(0), 1504 VALID_PIXEL_MODE(0), 1505 CF_INST(SQ_CF_INST_RETURN), 1506 BARRIER(1)); 1507 /* 9 - non-mask sub */ 1508 shader[i++] = CF_DWORD0(ADDR(50), 1509 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1510 shader[i++] = CF_DWORD1(POP_COUNT(0), 1511 CF_CONST(0), 1512 COND(SQ_CF_COND_ACTIVE), 1513 I_COUNT(2), 1514 VALID_PIXEL_MODE(0), 1515 CF_INST(SQ_CF_INST_TC), 1516 BARRIER(1)); 1517 1518 /* 10 - ALU */ 1519 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1520 KCACHE_BANK0(0), 1521 KCACHE_BANK1(0), 1522 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1523 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1524 KCACHE_ADDR0(0), 1525 KCACHE_ADDR1(0), 1526 I_COUNT(10), 1527 ALT_CONST(0), 1528 CF_INST(SQ_CF_INST_ALU), 1529 WHOLE_QUAD_MODE(0), 1530 BARRIER(1)); 1531 1532 /* 11 - dst */ 1533 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1534 TYPE(SQ_EXPORT_POS), 1535 RW_GPR(1), 1536 RW_REL(ABSOLUTE), 1537 INDEX_GPR(0), 1538 ELEM_SIZE(0)); 1539 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1540 SRC_SEL_Y(SQ_SEL_Y), 1541 SRC_SEL_Z(SQ_SEL_0), 1542 SRC_SEL_W(SQ_SEL_1), 1543 BURST_COUNT(0), 1544 VALID_PIXEL_MODE(0), 1545 CF_INST(SQ_CF_INST_EXPORT_DONE), 1546 MARK(0), 1547 BARRIER(1)); 1548 /* 12 - src */ 1549 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1550 TYPE(SQ_EXPORT_PARAM), 1551 RW_GPR(0), 1552 RW_REL(ABSOLUTE), 1553 INDEX_GPR(0), 1554 ELEM_SIZE(0)); 1555 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1556 SRC_SEL_Y(SQ_SEL_Y), 1557 SRC_SEL_Z(SQ_SEL_0), 1558 SRC_SEL_W(SQ_SEL_1), 1559 BURST_COUNT(0), 1560 VALID_PIXEL_MODE(0), 1561 CF_INST(SQ_CF_INST_EXPORT_DONE), 1562 MARK(0), 1563 BARRIER(0)); 1564 /* 13 */ 1565 shader[i++] = CF_DWORD0(ADDR(0), 1566 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1567 shader[i++] = CF_DWORD1(POP_COUNT(0), 1568 CF_CONST(0), 1569 COND(SQ_CF_COND_ACTIVE), 1570 I_COUNT(0), 1571 VALID_PIXEL_MODE(0), 1572 CF_INST(SQ_CF_INST_RETURN), 1573 BARRIER(1)); 1574 1575 /* 14 srcX.x DOT4 - mask */ 1576 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1577 SRC0_REL(ABSOLUTE), 1578 SRC0_ELEM(ELEM_X), 1579 SRC0_NEG(0), 1580 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1581 SRC1_REL(ABSOLUTE), 1582 SRC1_ELEM(ELEM_X), 1583 SRC1_NEG(0), 1584 INDEX_MODE(SQ_INDEX_LOOP), 1585 PRED_SEL(SQ_PRED_SEL_OFF), 1586 LAST(0)); 1587 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1588 SRC1_ABS(0), 1589 UPDATE_EXECUTE_MASK(0), 1590 UPDATE_PRED(0), 1591 WRITE_MASK(1), 1592 OMOD(SQ_ALU_OMOD_OFF), 1593 ALU_INST(SQ_OP2_INST_DOT4), 1594 BANK_SWIZZLE(SQ_ALU_VEC_012), 1595 DST_GPR(3), 1596 DST_REL(ABSOLUTE), 1597 DST_ELEM(ELEM_X), 1598 CLAMP(0)); 1599 1600 /* 15 srcX.y DOT4 - mask */ 1601 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1602 SRC0_REL(ABSOLUTE), 1603 SRC0_ELEM(ELEM_Y), 1604 SRC0_NEG(0), 1605 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1606 SRC1_REL(ABSOLUTE), 1607 SRC1_ELEM(ELEM_Y), 1608 SRC1_NEG(0), 1609 INDEX_MODE(SQ_INDEX_LOOP), 1610 PRED_SEL(SQ_PRED_SEL_OFF), 1611 LAST(0)); 1612 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1613 SRC1_ABS(0), 1614 UPDATE_EXECUTE_MASK(0), 1615 UPDATE_PRED(0), 1616 WRITE_MASK(0), 1617 OMOD(SQ_ALU_OMOD_OFF), 1618 ALU_INST(SQ_OP2_INST_DOT4), 1619 BANK_SWIZZLE(SQ_ALU_VEC_012), 1620 DST_GPR(3), 1621 DST_REL(ABSOLUTE), 1622 DST_ELEM(ELEM_Y), 1623 CLAMP(0)); 1624 1625 /* 16 srcX.z DOT4 - mask */ 1626 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1627 SRC0_REL(ABSOLUTE), 1628 SRC0_ELEM(ELEM_Z), 1629 SRC0_NEG(0), 1630 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1631 SRC1_REL(ABSOLUTE), 1632 SRC1_ELEM(ELEM_Z), 1633 SRC1_NEG(0), 1634 INDEX_MODE(SQ_INDEX_LOOP), 1635 PRED_SEL(SQ_PRED_SEL_OFF), 1636 LAST(0)); 1637 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1638 SRC1_ABS(0), 1639 UPDATE_EXECUTE_MASK(0), 1640 UPDATE_PRED(0), 1641 WRITE_MASK(0), 1642 OMOD(SQ_ALU_OMOD_OFF), 1643 ALU_INST(SQ_OP2_INST_DOT4), 1644 BANK_SWIZZLE(SQ_ALU_VEC_012), 1645 DST_GPR(3), 1646 DST_REL(ABSOLUTE), 1647 DST_ELEM(ELEM_Z), 1648 CLAMP(0)); 1649 1650 /* 17 srcX.w DOT4 - mask */ 1651 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1652 SRC0_REL(ABSOLUTE), 1653 SRC0_ELEM(ELEM_W), 1654 SRC0_NEG(0), 1655 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1656 SRC1_REL(ABSOLUTE), 1657 SRC1_ELEM(ELEM_W), 1658 SRC1_NEG(0), 1659 INDEX_MODE(SQ_INDEX_LOOP), 1660 PRED_SEL(SQ_PRED_SEL_OFF), 1661 LAST(1)); 1662 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1663 SRC1_ABS(0), 1664 UPDATE_EXECUTE_MASK(0), 1665 UPDATE_PRED(0), 1666 WRITE_MASK(0), 1667 OMOD(SQ_ALU_OMOD_OFF), 1668 ALU_INST(SQ_OP2_INST_DOT4), 1669 BANK_SWIZZLE(SQ_ALU_VEC_012), 1670 DST_GPR(3), 1671 DST_REL(ABSOLUTE), 1672 DST_ELEM(ELEM_W), 1673 CLAMP(0)); 1674 1675 /* 18 srcY.x DOT4 - mask */ 1676 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1677 SRC0_REL(ABSOLUTE), 1678 SRC0_ELEM(ELEM_X), 1679 SRC0_NEG(0), 1680 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1681 SRC1_REL(ABSOLUTE), 1682 SRC1_ELEM(ELEM_X), 1683 SRC1_NEG(0), 1684 INDEX_MODE(SQ_INDEX_LOOP), 1685 PRED_SEL(SQ_PRED_SEL_OFF), 1686 LAST(0)); 1687 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1688 SRC1_ABS(0), 1689 UPDATE_EXECUTE_MASK(0), 1690 UPDATE_PRED(0), 1691 WRITE_MASK(0), 1692 OMOD(SQ_ALU_OMOD_OFF), 1693 ALU_INST(SQ_OP2_INST_DOT4), 1694 BANK_SWIZZLE(SQ_ALU_VEC_012), 1695 DST_GPR(3), 1696 DST_REL(ABSOLUTE), 1697 DST_ELEM(ELEM_X), 1698 CLAMP(0)); 1699 1700 /* 19 srcY.y DOT4 - mask */ 1701 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1702 SRC0_REL(ABSOLUTE), 1703 SRC0_ELEM(ELEM_Y), 1704 SRC0_NEG(0), 1705 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1706 SRC1_REL(ABSOLUTE), 1707 SRC1_ELEM(ELEM_Y), 1708 SRC1_NEG(0), 1709 INDEX_MODE(SQ_INDEX_LOOP), 1710 PRED_SEL(SQ_PRED_SEL_OFF), 1711 LAST(0)); 1712 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1713 SRC1_ABS(0), 1714 UPDATE_EXECUTE_MASK(0), 1715 UPDATE_PRED(0), 1716 WRITE_MASK(1), 1717 OMOD(SQ_ALU_OMOD_OFF), 1718 ALU_INST(SQ_OP2_INST_DOT4), 1719 BANK_SWIZZLE(SQ_ALU_VEC_012), 1720 DST_GPR(3), 1721 DST_REL(ABSOLUTE), 1722 DST_ELEM(ELEM_Y), 1723 CLAMP(0)); 1724 1725 /* 20 srcY.z DOT4 - mask */ 1726 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1727 SRC0_REL(ABSOLUTE), 1728 SRC0_ELEM(ELEM_Z), 1729 SRC0_NEG(0), 1730 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1731 SRC1_REL(ABSOLUTE), 1732 SRC1_ELEM(ELEM_Z), 1733 SRC1_NEG(0), 1734 INDEX_MODE(SQ_INDEX_LOOP), 1735 PRED_SEL(SQ_PRED_SEL_OFF), 1736 LAST(0)); 1737 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1738 SRC1_ABS(0), 1739 UPDATE_EXECUTE_MASK(0), 1740 UPDATE_PRED(0), 1741 WRITE_MASK(0), 1742 OMOD(SQ_ALU_OMOD_OFF), 1743 ALU_INST(SQ_OP2_INST_DOT4), 1744 BANK_SWIZZLE(SQ_ALU_VEC_012), 1745 DST_GPR(3), 1746 DST_REL(ABSOLUTE), 1747 DST_ELEM(ELEM_Z), 1748 CLAMP(0)); 1749 1750 /* 21 srcY.w DOT4 - mask */ 1751 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1752 SRC0_REL(ABSOLUTE), 1753 SRC0_ELEM(ELEM_W), 1754 SRC0_NEG(0), 1755 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1756 SRC1_REL(ABSOLUTE), 1757 SRC1_ELEM(ELEM_W), 1758 SRC1_NEG(0), 1759 INDEX_MODE(SQ_INDEX_LOOP), 1760 PRED_SEL(SQ_PRED_SEL_OFF), 1761 LAST(1)); 1762 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1763 SRC1_ABS(0), 1764 UPDATE_EXECUTE_MASK(0), 1765 UPDATE_PRED(0), 1766 WRITE_MASK(0), 1767 OMOD(SQ_ALU_OMOD_OFF), 1768 ALU_INST(SQ_OP2_INST_DOT4), 1769 BANK_SWIZZLE(SQ_ALU_VEC_012), 1770 DST_GPR(3), 1771 DST_REL(ABSOLUTE), 1772 DST_ELEM(ELEM_W), 1773 CLAMP(0)); 1774 1775 /* 22 maskX.x DOT4 - mask */ 1776 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1777 SRC0_REL(ABSOLUTE), 1778 SRC0_ELEM(ELEM_X), 1779 SRC0_NEG(0), 1780 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1781 SRC1_REL(ABSOLUTE), 1782 SRC1_ELEM(ELEM_X), 1783 SRC1_NEG(0), 1784 INDEX_MODE(SQ_INDEX_LOOP), 1785 PRED_SEL(SQ_PRED_SEL_OFF), 1786 LAST(0)); 1787 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1788 SRC1_ABS(0), 1789 UPDATE_EXECUTE_MASK(0), 1790 UPDATE_PRED(0), 1791 WRITE_MASK(1), 1792 OMOD(SQ_ALU_OMOD_OFF), 1793 ALU_INST(SQ_OP2_INST_DOT4), 1794 BANK_SWIZZLE(SQ_ALU_VEC_012), 1795 DST_GPR(4), 1796 DST_REL(ABSOLUTE), 1797 DST_ELEM(ELEM_X), 1798 CLAMP(0)); 1799 1800 /* 23 maskX.y DOT4 - mask */ 1801 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1802 SRC0_REL(ABSOLUTE), 1803 SRC0_ELEM(ELEM_Y), 1804 SRC0_NEG(0), 1805 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1806 SRC1_REL(ABSOLUTE), 1807 SRC1_ELEM(ELEM_Y), 1808 SRC1_NEG(0), 1809 INDEX_MODE(SQ_INDEX_LOOP), 1810 PRED_SEL(SQ_PRED_SEL_OFF), 1811 LAST(0)); 1812 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1813 SRC1_ABS(0), 1814 UPDATE_EXECUTE_MASK(0), 1815 UPDATE_PRED(0), 1816 WRITE_MASK(0), 1817 OMOD(SQ_ALU_OMOD_OFF), 1818 ALU_INST(SQ_OP2_INST_DOT4), 1819 BANK_SWIZZLE(SQ_ALU_VEC_012), 1820 DST_GPR(4), 1821 DST_REL(ABSOLUTE), 1822 DST_ELEM(ELEM_Y), 1823 CLAMP(0)); 1824 1825 /* 24 maskX.z DOT4 - mask */ 1826 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1827 SRC0_REL(ABSOLUTE), 1828 SRC0_ELEM(ELEM_Z), 1829 SRC0_NEG(0), 1830 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1831 SRC1_REL(ABSOLUTE), 1832 SRC1_ELEM(ELEM_Z), 1833 SRC1_NEG(0), 1834 INDEX_MODE(SQ_INDEX_LOOP), 1835 PRED_SEL(SQ_PRED_SEL_OFF), 1836 LAST(0)); 1837 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1838 SRC1_ABS(0), 1839 UPDATE_EXECUTE_MASK(0), 1840 UPDATE_PRED(0), 1841 WRITE_MASK(0), 1842 OMOD(SQ_ALU_OMOD_OFF), 1843 ALU_INST(SQ_OP2_INST_DOT4), 1844 BANK_SWIZZLE(SQ_ALU_VEC_012), 1845 DST_GPR(4), 1846 DST_REL(ABSOLUTE), 1847 DST_ELEM(ELEM_Z), 1848 CLAMP(0)); 1849 1850 /* 25 maskX.w DOT4 - mask */ 1851 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1852 SRC0_REL(ABSOLUTE), 1853 SRC0_ELEM(ELEM_W), 1854 SRC0_NEG(0), 1855 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1856 SRC1_REL(ABSOLUTE), 1857 SRC1_ELEM(ELEM_W), 1858 SRC1_NEG(0), 1859 INDEX_MODE(SQ_INDEX_LOOP), 1860 PRED_SEL(SQ_PRED_SEL_OFF), 1861 LAST(1)); 1862 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1863 SRC1_ABS(0), 1864 UPDATE_EXECUTE_MASK(0), 1865 UPDATE_PRED(0), 1866 WRITE_MASK(0), 1867 OMOD(SQ_ALU_OMOD_OFF), 1868 ALU_INST(SQ_OP2_INST_DOT4), 1869 BANK_SWIZZLE(SQ_ALU_VEC_012), 1870 DST_GPR(4), 1871 DST_REL(ABSOLUTE), 1872 DST_ELEM(ELEM_W), 1873 CLAMP(0)); 1874 1875 /* 26 maskY.x DOT4 - mask */ 1876 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1877 SRC0_REL(ABSOLUTE), 1878 SRC0_ELEM(ELEM_X), 1879 SRC0_NEG(0), 1880 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1881 SRC1_REL(ABSOLUTE), 1882 SRC1_ELEM(ELEM_X), 1883 SRC1_NEG(0), 1884 INDEX_MODE(SQ_INDEX_LOOP), 1885 PRED_SEL(SQ_PRED_SEL_OFF), 1886 LAST(0)); 1887 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1888 SRC1_ABS(0), 1889 UPDATE_EXECUTE_MASK(0), 1890 UPDATE_PRED(0), 1891 WRITE_MASK(0), 1892 OMOD(SQ_ALU_OMOD_OFF), 1893 ALU_INST(SQ_OP2_INST_DOT4), 1894 BANK_SWIZZLE(SQ_ALU_VEC_012), 1895 DST_GPR(4), 1896 DST_REL(ABSOLUTE), 1897 DST_ELEM(ELEM_X), 1898 CLAMP(0)); 1899 1900 /* 27 maskY.y DOT4 - mask */ 1901 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1902 SRC0_REL(ABSOLUTE), 1903 SRC0_ELEM(ELEM_Y), 1904 SRC0_NEG(0), 1905 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1906 SRC1_REL(ABSOLUTE), 1907 SRC1_ELEM(ELEM_Y), 1908 SRC1_NEG(0), 1909 INDEX_MODE(SQ_INDEX_LOOP), 1910 PRED_SEL(SQ_PRED_SEL_OFF), 1911 LAST(0)); 1912 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1913 SRC1_ABS(0), 1914 UPDATE_EXECUTE_MASK(0), 1915 UPDATE_PRED(0), 1916 WRITE_MASK(1), 1917 OMOD(SQ_ALU_OMOD_OFF), 1918 ALU_INST(SQ_OP2_INST_DOT4), 1919 BANK_SWIZZLE(SQ_ALU_VEC_012), 1920 DST_GPR(4), 1921 DST_REL(ABSOLUTE), 1922 DST_ELEM(ELEM_Y), 1923 CLAMP(0)); 1924 1925 /* 28 maskY.z DOT4 - mask */ 1926 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1927 SRC0_REL(ABSOLUTE), 1928 SRC0_ELEM(ELEM_Z), 1929 SRC0_NEG(0), 1930 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1931 SRC1_REL(ABSOLUTE), 1932 SRC1_ELEM(ELEM_Z), 1933 SRC1_NEG(0), 1934 INDEX_MODE(SQ_INDEX_LOOP), 1935 PRED_SEL(SQ_PRED_SEL_OFF), 1936 LAST(0)); 1937 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1938 SRC1_ABS(0), 1939 UPDATE_EXECUTE_MASK(0), 1940 UPDATE_PRED(0), 1941 WRITE_MASK(0), 1942 OMOD(SQ_ALU_OMOD_OFF), 1943 ALU_INST(SQ_OP2_INST_DOT4), 1944 BANK_SWIZZLE(SQ_ALU_VEC_012), 1945 DST_GPR(4), 1946 DST_REL(ABSOLUTE), 1947 DST_ELEM(ELEM_Z), 1948 CLAMP(0)); 1949 1950 /* 29 maskY.w DOT4 - mask */ 1951 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1952 SRC0_REL(ABSOLUTE), 1953 SRC0_ELEM(ELEM_W), 1954 SRC0_NEG(0), 1955 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1956 SRC1_REL(ABSOLUTE), 1957 SRC1_ELEM(ELEM_W), 1958 SRC1_NEG(0), 1959 INDEX_MODE(SQ_INDEX_LOOP), 1960 PRED_SEL(SQ_PRED_SEL_OFF), 1961 LAST(1)); 1962 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1963 SRC1_ABS(0), 1964 UPDATE_EXECUTE_MASK(0), 1965 UPDATE_PRED(0), 1966 WRITE_MASK(0), 1967 OMOD(SQ_ALU_OMOD_OFF), 1968 ALU_INST(SQ_OP2_INST_DOT4), 1969 BANK_SWIZZLE(SQ_ALU_VEC_012), 1970 DST_GPR(4), 1971 DST_REL(ABSOLUTE), 1972 DST_ELEM(ELEM_W), 1973 CLAMP(0)); 1974 1975 /* 30 srcX / w */ 1976 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1977 SRC0_REL(ABSOLUTE), 1978 SRC0_ELEM(ELEM_X), 1979 SRC0_NEG(0), 1980 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1981 SRC1_REL(ABSOLUTE), 1982 SRC1_ELEM(ELEM_W), 1983 SRC1_NEG(0), 1984 INDEX_MODE(SQ_INDEX_AR_X), 1985 PRED_SEL(SQ_PRED_SEL_OFF), 1986 LAST(1)); 1987 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1988 SRC1_ABS(0), 1989 UPDATE_EXECUTE_MASK(0), 1990 UPDATE_PRED(0), 1991 WRITE_MASK(1), 1992 OMOD(SQ_ALU_OMOD_OFF), 1993 ALU_INST(SQ_OP2_INST_MUL), 1994 BANK_SWIZZLE(SQ_ALU_VEC_012), 1995 DST_GPR(1), 1996 DST_REL(ABSOLUTE), 1997 DST_ELEM(ELEM_X), 1998 CLAMP(0)); 1999 2000 /* 31 srcY / h */ 2001 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 2002 SRC0_REL(ABSOLUTE), 2003 SRC0_ELEM(ELEM_Y), 2004 SRC0_NEG(0), 2005 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2006 SRC1_REL(ABSOLUTE), 2007 SRC1_ELEM(ELEM_W), 2008 SRC1_NEG(0), 2009 INDEX_MODE(SQ_INDEX_AR_X), 2010 PRED_SEL(SQ_PRED_SEL_OFF), 2011 LAST(1)); 2012 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2013 SRC1_ABS(0), 2014 UPDATE_EXECUTE_MASK(0), 2015 UPDATE_PRED(0), 2016 WRITE_MASK(1), 2017 OMOD(SQ_ALU_OMOD_OFF), 2018 ALU_INST(SQ_OP2_INST_MUL), 2019 BANK_SWIZZLE(SQ_ALU_VEC_012), 2020 DST_GPR(1), 2021 DST_REL(ABSOLUTE), 2022 DST_ELEM(ELEM_Y), 2023 CLAMP(0)); 2024 2025 /* 32 maskX / w */ 2026 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2027 SRC0_REL(ABSOLUTE), 2028 SRC0_ELEM(ELEM_X), 2029 SRC0_NEG(0), 2030 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 2031 SRC1_REL(ABSOLUTE), 2032 SRC1_ELEM(ELEM_W), 2033 SRC1_NEG(0), 2034 INDEX_MODE(SQ_INDEX_AR_X), 2035 PRED_SEL(SQ_PRED_SEL_OFF), 2036 LAST(1)); 2037 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2038 SRC1_ABS(0), 2039 UPDATE_EXECUTE_MASK(0), 2040 UPDATE_PRED(0), 2041 WRITE_MASK(1), 2042 OMOD(SQ_ALU_OMOD_OFF), 2043 ALU_INST(SQ_OP2_INST_MUL), 2044 BANK_SWIZZLE(SQ_ALU_VEC_012), 2045 DST_GPR(0), 2046 DST_REL(ABSOLUTE), 2047 DST_ELEM(ELEM_X), 2048 CLAMP(0)); 2049 2050 /* 33 maskY / h */ 2051 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2052 SRC0_REL(ABSOLUTE), 2053 SRC0_ELEM(ELEM_Y), 2054 SRC0_NEG(0), 2055 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 2056 SRC1_REL(ABSOLUTE), 2057 SRC1_ELEM(ELEM_W), 2058 SRC1_NEG(0), 2059 INDEX_MODE(SQ_INDEX_AR_X), 2060 PRED_SEL(SQ_PRED_SEL_OFF), 2061 LAST(1)); 2062 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2063 SRC1_ABS(0), 2064 UPDATE_EXECUTE_MASK(0), 2065 UPDATE_PRED(0), 2066 WRITE_MASK(1), 2067 OMOD(SQ_ALU_OMOD_OFF), 2068 ALU_INST(SQ_OP2_INST_MUL), 2069 BANK_SWIZZLE(SQ_ALU_VEC_012), 2070 DST_GPR(0), 2071 DST_REL(ABSOLUTE), 2072 DST_ELEM(ELEM_Y), 2073 CLAMP(0)); 2074 2075 /* 34 srcX.x DOT4 - non-mask */ 2076 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2077 SRC0_REL(ABSOLUTE), 2078 SRC0_ELEM(ELEM_X), 2079 SRC0_NEG(0), 2080 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2081 SRC1_REL(ABSOLUTE), 2082 SRC1_ELEM(ELEM_X), 2083 SRC1_NEG(0), 2084 INDEX_MODE(SQ_INDEX_LOOP), 2085 PRED_SEL(SQ_PRED_SEL_OFF), 2086 LAST(0)); 2087 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2088 SRC1_ABS(0), 2089 UPDATE_EXECUTE_MASK(0), 2090 UPDATE_PRED(0), 2091 WRITE_MASK(1), 2092 OMOD(SQ_ALU_OMOD_OFF), 2093 ALU_INST(SQ_OP2_INST_DOT4), 2094 BANK_SWIZZLE(SQ_ALU_VEC_012), 2095 DST_GPR(2), 2096 DST_REL(ABSOLUTE), 2097 DST_ELEM(ELEM_X), 2098 CLAMP(0)); 2099 2100 /* 35 srcX.y DOT4 - non-mask */ 2101 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2102 SRC0_REL(ABSOLUTE), 2103 SRC0_ELEM(ELEM_Y), 2104 SRC0_NEG(0), 2105 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2106 SRC1_REL(ABSOLUTE), 2107 SRC1_ELEM(ELEM_Y), 2108 SRC1_NEG(0), 2109 INDEX_MODE(SQ_INDEX_LOOP), 2110 PRED_SEL(SQ_PRED_SEL_OFF), 2111 LAST(0)); 2112 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2113 SRC1_ABS(0), 2114 UPDATE_EXECUTE_MASK(0), 2115 UPDATE_PRED(0), 2116 WRITE_MASK(0), 2117 OMOD(SQ_ALU_OMOD_OFF), 2118 ALU_INST(SQ_OP2_INST_DOT4), 2119 BANK_SWIZZLE(SQ_ALU_VEC_012), 2120 DST_GPR(2), 2121 DST_REL(ABSOLUTE), 2122 DST_ELEM(ELEM_Y), 2123 CLAMP(0)); 2124 2125 /* 36 srcX.z DOT4 - non-mask */ 2126 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2127 SRC0_REL(ABSOLUTE), 2128 SRC0_ELEM(ELEM_Z), 2129 SRC0_NEG(0), 2130 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2131 SRC1_REL(ABSOLUTE), 2132 SRC1_ELEM(ELEM_Z), 2133 SRC1_NEG(0), 2134 INDEX_MODE(SQ_INDEX_LOOP), 2135 PRED_SEL(SQ_PRED_SEL_OFF), 2136 LAST(0)); 2137 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2138 SRC1_ABS(0), 2139 UPDATE_EXECUTE_MASK(0), 2140 UPDATE_PRED(0), 2141 WRITE_MASK(0), 2142 OMOD(SQ_ALU_OMOD_OFF), 2143 ALU_INST(SQ_OP2_INST_DOT4), 2144 BANK_SWIZZLE(SQ_ALU_VEC_012), 2145 DST_GPR(2), 2146 DST_REL(ABSOLUTE), 2147 DST_ELEM(ELEM_Z), 2148 CLAMP(0)); 2149 2150 /* 37 srcX.w DOT4 - non-mask */ 2151 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2152 SRC0_REL(ABSOLUTE), 2153 SRC0_ELEM(ELEM_W), 2154 SRC0_NEG(0), 2155 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2156 SRC1_REL(ABSOLUTE), 2157 SRC1_ELEM(ELEM_W), 2158 SRC1_NEG(0), 2159 INDEX_MODE(SQ_INDEX_LOOP), 2160 PRED_SEL(SQ_PRED_SEL_OFF), 2161 LAST(1)); 2162 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2163 SRC1_ABS(0), 2164 UPDATE_EXECUTE_MASK(0), 2165 UPDATE_PRED(0), 2166 WRITE_MASK(0), 2167 OMOD(SQ_ALU_OMOD_OFF), 2168 ALU_INST(SQ_OP2_INST_DOT4), 2169 BANK_SWIZZLE(SQ_ALU_VEC_012), 2170 DST_GPR(2), 2171 DST_REL(ABSOLUTE), 2172 DST_ELEM(ELEM_W), 2173 CLAMP(0)); 2174 2175 /* 38 srcY.x DOT4 - non-mask */ 2176 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2177 SRC0_REL(ABSOLUTE), 2178 SRC0_ELEM(ELEM_X), 2179 SRC0_NEG(0), 2180 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2181 SRC1_REL(ABSOLUTE), 2182 SRC1_ELEM(ELEM_X), 2183 SRC1_NEG(0), 2184 INDEX_MODE(SQ_INDEX_LOOP), 2185 PRED_SEL(SQ_PRED_SEL_OFF), 2186 LAST(0)); 2187 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2188 SRC1_ABS(0), 2189 UPDATE_EXECUTE_MASK(0), 2190 UPDATE_PRED(0), 2191 WRITE_MASK(0), 2192 OMOD(SQ_ALU_OMOD_OFF), 2193 ALU_INST(SQ_OP2_INST_DOT4), 2194 BANK_SWIZZLE(SQ_ALU_VEC_012), 2195 DST_GPR(2), 2196 DST_REL(ABSOLUTE), 2197 DST_ELEM(ELEM_X), 2198 CLAMP(0)); 2199 2200 /* 39 srcY.y DOT4 - non-mask */ 2201 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2202 SRC0_REL(ABSOLUTE), 2203 SRC0_ELEM(ELEM_Y), 2204 SRC0_NEG(0), 2205 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2206 SRC1_REL(ABSOLUTE), 2207 SRC1_ELEM(ELEM_Y), 2208 SRC1_NEG(0), 2209 INDEX_MODE(SQ_INDEX_LOOP), 2210 PRED_SEL(SQ_PRED_SEL_OFF), 2211 LAST(0)); 2212 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2213 SRC1_ABS(0), 2214 UPDATE_EXECUTE_MASK(0), 2215 UPDATE_PRED(0), 2216 WRITE_MASK(1), 2217 OMOD(SQ_ALU_OMOD_OFF), 2218 ALU_INST(SQ_OP2_INST_DOT4), 2219 BANK_SWIZZLE(SQ_ALU_VEC_012), 2220 DST_GPR(2), 2221 DST_REL(ABSOLUTE), 2222 DST_ELEM(ELEM_Y), 2223 CLAMP(0)); 2224 2225 /* 40 srcY.z DOT4 - non-mask */ 2226 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2227 SRC0_REL(ABSOLUTE), 2228 SRC0_ELEM(ELEM_Z), 2229 SRC0_NEG(0), 2230 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2231 SRC1_REL(ABSOLUTE), 2232 SRC1_ELEM(ELEM_Z), 2233 SRC1_NEG(0), 2234 INDEX_MODE(SQ_INDEX_LOOP), 2235 PRED_SEL(SQ_PRED_SEL_OFF), 2236 LAST(0)); 2237 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2238 SRC1_ABS(0), 2239 UPDATE_EXECUTE_MASK(0), 2240 UPDATE_PRED(0), 2241 WRITE_MASK(0), 2242 OMOD(SQ_ALU_OMOD_OFF), 2243 ALU_INST(SQ_OP2_INST_DOT4), 2244 BANK_SWIZZLE(SQ_ALU_VEC_012), 2245 DST_GPR(2), 2246 DST_REL(ABSOLUTE), 2247 DST_ELEM(ELEM_Z), 2248 CLAMP(0)); 2249 2250 /* 41 srcY.w DOT4 - non-mask */ 2251 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2252 SRC0_REL(ABSOLUTE), 2253 SRC0_ELEM(ELEM_W), 2254 SRC0_NEG(0), 2255 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2256 SRC1_REL(ABSOLUTE), 2257 SRC1_ELEM(ELEM_W), 2258 SRC1_NEG(0), 2259 INDEX_MODE(SQ_INDEX_LOOP), 2260 PRED_SEL(SQ_PRED_SEL_OFF), 2261 LAST(1)); 2262 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2263 SRC1_ABS(0), 2264 UPDATE_EXECUTE_MASK(0), 2265 UPDATE_PRED(0), 2266 WRITE_MASK(0), 2267 OMOD(SQ_ALU_OMOD_OFF), 2268 ALU_INST(SQ_OP2_INST_DOT4), 2269 BANK_SWIZZLE(SQ_ALU_VEC_012), 2270 DST_GPR(2), 2271 DST_REL(ABSOLUTE), 2272 DST_ELEM(ELEM_W), 2273 CLAMP(0)); 2274 2275 /* 42 srcX / w */ 2276 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2277 SRC0_REL(ABSOLUTE), 2278 SRC0_ELEM(ELEM_X), 2279 SRC0_NEG(0), 2280 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2281 SRC1_REL(ABSOLUTE), 2282 SRC1_ELEM(ELEM_W), 2283 SRC1_NEG(0), 2284 INDEX_MODE(SQ_INDEX_AR_X), 2285 PRED_SEL(SQ_PRED_SEL_OFF), 2286 LAST(1)); 2287 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2288 SRC1_ABS(0), 2289 UPDATE_EXECUTE_MASK(0), 2290 UPDATE_PRED(0), 2291 WRITE_MASK(1), 2292 OMOD(SQ_ALU_OMOD_OFF), 2293 ALU_INST(SQ_OP2_INST_MUL), 2294 BANK_SWIZZLE(SQ_ALU_VEC_012), 2295 DST_GPR(0), 2296 DST_REL(ABSOLUTE), 2297 DST_ELEM(ELEM_X), 2298 CLAMP(0)); 2299 2300 /* 43 srcY / h */ 2301 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2302 SRC0_REL(ABSOLUTE), 2303 SRC0_ELEM(ELEM_Y), 2304 SRC0_NEG(0), 2305 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2306 SRC1_REL(ABSOLUTE), 2307 SRC1_ELEM(ELEM_W), 2308 SRC1_NEG(0), 2309 INDEX_MODE(SQ_INDEX_AR_X), 2310 PRED_SEL(SQ_PRED_SEL_OFF), 2311 LAST(1)); 2312 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2313 SRC1_ABS(0), 2314 UPDATE_EXECUTE_MASK(0), 2315 UPDATE_PRED(0), 2316 WRITE_MASK(1), 2317 OMOD(SQ_ALU_OMOD_OFF), 2318 ALU_INST(SQ_OP2_INST_MUL), 2319 BANK_SWIZZLE(SQ_ALU_VEC_012), 2320 DST_GPR(0), 2321 DST_REL(ABSOLUTE), 2322 DST_ELEM(ELEM_Y), 2323 CLAMP(0)); 2324 /* mask vfetch - 44/45 - dst */ 2325 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2326 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2327 FETCH_WHOLE_QUAD(0), 2328 BUFFER_ID(0), 2329 SRC_GPR(0), 2330 SRC_REL(ABSOLUTE), 2331 SRC_SEL_X(SQ_SEL_X), 2332 SRC_SEL_Y(SQ_SEL_Y), 2333 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2334 LDS_REQ(0), 2335 COALESCED_READ(0)); 2336 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2337 DST_REL(0), 2338 DST_SEL_X(SQ_SEL_X), 2339 DST_SEL_Y(SQ_SEL_Y), 2340 DST_SEL_Z(SQ_SEL_0), 2341 DST_SEL_W(SQ_SEL_1), 2342 USE_CONST_FIELDS(0), 2343 DATA_FORMAT(FMT_32_32_FLOAT), 2344 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2345 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2346 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2347 shader[i++] = VTX_DWORD2(OFFSET(0), 2348#if X_BYTE_ORDER == X_BIG_ENDIAN 2349 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2350#else 2351 ENDIAN_SWAP(ENDIAN_NONE), 2352#endif 2353 CONST_BUF_NO_STRIDE(0), 2354 ALT_CONST(0), 2355 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2356 shader[i++] = VTX_DWORD_PAD; 2357 /* 46/47 - src */ 2358 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2359 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2360 FETCH_WHOLE_QUAD(0), 2361 BUFFER_ID(0), 2362 SRC_GPR(0), 2363 SRC_REL(ABSOLUTE), 2364 SRC_SEL_X(SQ_SEL_X), 2365 SRC_SEL_Y(SQ_SEL_Y), 2366 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2367 LDS_REQ(0), 2368 COALESCED_READ(0)); 2369 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2370 DST_REL(0), 2371 DST_SEL_X(SQ_SEL_X), 2372 DST_SEL_Y(SQ_SEL_Y), 2373 DST_SEL_Z(SQ_SEL_1), 2374 DST_SEL_W(SQ_SEL_0), 2375 USE_CONST_FIELDS(0), 2376 DATA_FORMAT(FMT_32_32_FLOAT), 2377 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2378 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2379 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2380 shader[i++] = VTX_DWORD2(OFFSET(8), 2381#if X_BYTE_ORDER == X_BIG_ENDIAN 2382 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2383#else 2384 ENDIAN_SWAP(ENDIAN_NONE), 2385#endif 2386 CONST_BUF_NO_STRIDE(0), 2387 ALT_CONST(0), 2388 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2389 shader[i++] = VTX_DWORD_PAD; 2390 /* 48/49 - mask */ 2391 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2392 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2393 FETCH_WHOLE_QUAD(0), 2394 BUFFER_ID(0), 2395 SRC_GPR(0), 2396 SRC_REL(ABSOLUTE), 2397 SRC_SEL_X(SQ_SEL_X), 2398 SRC_SEL_Y(SQ_SEL_Y), 2399 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2400 LDS_REQ(0), 2401 COALESCED_READ(0)); 2402 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2403 DST_REL(0), 2404 DST_SEL_X(SQ_SEL_X), 2405 DST_SEL_Y(SQ_SEL_Y), 2406 DST_SEL_Z(SQ_SEL_1), 2407 DST_SEL_W(SQ_SEL_0), 2408 USE_CONST_FIELDS(0), 2409 DATA_FORMAT(FMT_32_32_FLOAT), 2410 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2411 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2412 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2413 shader[i++] = VTX_DWORD2(OFFSET(16), 2414#if X_BYTE_ORDER == X_BIG_ENDIAN 2415 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2416#else 2417 ENDIAN_SWAP(ENDIAN_NONE), 2418#endif 2419 CONST_BUF_NO_STRIDE(0), 2420 ALT_CONST(0), 2421 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2422 shader[i++] = VTX_DWORD_PAD; 2423 2424 /* no mask vfetch - 50/51 - dst */ 2425 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2426 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2427 FETCH_WHOLE_QUAD(0), 2428 BUFFER_ID(0), 2429 SRC_GPR(0), 2430 SRC_REL(ABSOLUTE), 2431 SRC_SEL_X(SQ_SEL_X), 2432 SRC_SEL_Y(SQ_SEL_Y), 2433 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2434 LDS_REQ(0), 2435 COALESCED_READ(0)); 2436 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2437 DST_REL(0), 2438 DST_SEL_X(SQ_SEL_X), 2439 DST_SEL_Y(SQ_SEL_Y), 2440 DST_SEL_Z(SQ_SEL_0), 2441 DST_SEL_W(SQ_SEL_1), 2442 USE_CONST_FIELDS(0), 2443 DATA_FORMAT(FMT_32_32_FLOAT), 2444 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2445 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2446 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2447 shader[i++] = VTX_DWORD2(OFFSET(0), 2448#if X_BYTE_ORDER == X_BIG_ENDIAN 2449 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2450#else 2451 ENDIAN_SWAP(ENDIAN_NONE), 2452#endif 2453 CONST_BUF_NO_STRIDE(0), 2454 ALT_CONST(0), 2455 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2456 shader[i++] = VTX_DWORD_PAD; 2457 /* 52/53 - src */ 2458 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2459 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2460 FETCH_WHOLE_QUAD(0), 2461 BUFFER_ID(0), 2462 SRC_GPR(0), 2463 SRC_REL(ABSOLUTE), 2464 SRC_SEL_X(SQ_SEL_X), 2465 SRC_SEL_Y(SQ_SEL_Y), 2466 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2467 LDS_REQ(0), 2468 COALESCED_READ(0)); 2469 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2470 DST_REL(0), 2471 DST_SEL_X(SQ_SEL_X), 2472 DST_SEL_Y(SQ_SEL_Y), 2473 DST_SEL_Z(SQ_SEL_1), 2474 DST_SEL_W(SQ_SEL_0), 2475 USE_CONST_FIELDS(0), 2476 DATA_FORMAT(FMT_32_32_FLOAT), 2477 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2478 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2479 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2480 shader[i++] = VTX_DWORD2(OFFSET(8), 2481#if X_BYTE_ORDER == X_BIG_ENDIAN 2482 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2483#else 2484 ENDIAN_SWAP(ENDIAN_NONE), 2485#endif 2486 CONST_BUF_NO_STRIDE(0), 2487 ALT_CONST(0), 2488 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2489 shader[i++] = VTX_DWORD_PAD; 2490 2491 return i; 2492} 2493 2494/* comp ps --------------------------------------- */ 2495int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2496{ 2497 int i = 0; 2498 2499 /* 0 */ 2500 shader[i++] = CF_DWORD0(ADDR(3), 2501 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2502 shader[i++] = CF_DWORD1(POP_COUNT(0), 2503 CF_CONST(0), 2504 COND(SQ_CF_COND_BOOL), 2505 I_COUNT(0), 2506 VALID_PIXEL_MODE(0), 2507 CF_INST(SQ_CF_INST_CALL), 2508 BARRIER(0)); 2509 /* 1 */ 2510 shader[i++] = CF_DWORD0(ADDR(8), 2511 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2512 shader[i++] = CF_DWORD1(POP_COUNT(0), 2513 CF_CONST(0), 2514 COND(SQ_CF_COND_NOT_BOOL), 2515 I_COUNT(0), 2516 VALID_PIXEL_MODE(0), 2517 CF_INST(SQ_CF_INST_CALL), 2518 BARRIER(0)); 2519 /* 2 - end */ 2520 shader[i++] = CF_DWORD0(ADDR(0), 2521 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2522 shader[i++] = CF_DWORD1(POP_COUNT(0), 2523 CF_CONST(0), 2524 COND(SQ_CF_COND_ACTIVE), 2525 I_COUNT(0), 2526 VALID_PIXEL_MODE(0), 2527 CF_INST(SQ_CF_INST_END), 2528 BARRIER(1)); 2529 /* 3 - mask sub */ 2530 shader[i++] = CF_ALU_DWORD0(ADDR(12), 2531 KCACHE_BANK0(0), 2532 KCACHE_BANK1(0), 2533 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2534 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2535 KCACHE_ADDR0(0), 2536 KCACHE_ADDR1(0), 2537 I_COUNT(8), 2538 ALT_CONST(0), 2539 CF_INST(SQ_CF_INST_ALU), 2540 WHOLE_QUAD_MODE(0), 2541 BARRIER(1)); 2542 2543 /* 4 */ 2544 shader[i++] = CF_DWORD0(ADDR(28), 2545 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2546 shader[i++] = CF_DWORD1(POP_COUNT(0), 2547 CF_CONST(0), 2548 COND(SQ_CF_COND_ACTIVE), 2549 I_COUNT(2), 2550 VALID_PIXEL_MODE(0), 2551 CF_INST(SQ_CF_INST_TC), 2552 BARRIER(1)); 2553 2554 /* 5 */ 2555 shader[i++] = CF_ALU_DWORD0(ADDR(20), 2556 KCACHE_BANK0(0), 2557 KCACHE_BANK1(0), 2558 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2559 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2560 KCACHE_ADDR0(0), 2561 KCACHE_ADDR1(0), 2562 I_COUNT(4), 2563 ALT_CONST(0), 2564 CF_INST(SQ_CF_INST_ALU), 2565 WHOLE_QUAD_MODE(0), 2566 BARRIER(1)); 2567 2568 /* 6 */ 2569 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2570 TYPE(SQ_EXPORT_PIXEL), 2571 RW_GPR(2), 2572 RW_REL(ABSOLUTE), 2573 INDEX_GPR(0), 2574 ELEM_SIZE(1)); 2575 2576 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2577 SRC_SEL_Y(SQ_SEL_Y), 2578 SRC_SEL_Z(SQ_SEL_Z), 2579 SRC_SEL_W(SQ_SEL_W), 2580 BURST_COUNT(1), 2581 VALID_PIXEL_MODE(0), 2582 CF_INST(SQ_CF_INST_EXPORT_DONE), 2583 MARK(0), 2584 BARRIER(1)); 2585 /* 7 */ 2586 shader[i++] = CF_DWORD0(ADDR(0), 2587 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2588 shader[i++] = CF_DWORD1(POP_COUNT(0), 2589 CF_CONST(0), 2590 COND(SQ_CF_COND_ACTIVE), 2591 I_COUNT(0), 2592 VALID_PIXEL_MODE(0), 2593 CF_INST(SQ_CF_INST_RETURN), 2594 BARRIER(1)); 2595 2596 /* 8 - non-mask sub */ 2597 shader[i++] = CF_ALU_DWORD0(ADDR(24), 2598 KCACHE_BANK0(0), 2599 KCACHE_BANK1(0), 2600 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2601 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2602 KCACHE_ADDR0(0), 2603 KCACHE_ADDR1(0), 2604 I_COUNT(4), 2605 ALT_CONST(0), 2606 CF_INST(SQ_CF_INST_ALU), 2607 WHOLE_QUAD_MODE(0), 2608 BARRIER(1)); 2609 /* 9 */ 2610 shader[i++] = CF_DWORD0(ADDR(32), 2611 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2612 shader[i++] = CF_DWORD1(POP_COUNT(0), 2613 CF_CONST(0), 2614 COND(SQ_CF_COND_ACTIVE), 2615 I_COUNT(1), 2616 VALID_PIXEL_MODE(0), 2617 CF_INST(SQ_CF_INST_TC), 2618 BARRIER(1)); 2619 2620 /* 10 */ 2621 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2622 TYPE(SQ_EXPORT_PIXEL), 2623 RW_GPR(0), 2624 RW_REL(ABSOLUTE), 2625 INDEX_GPR(0), 2626 ELEM_SIZE(1)); 2627 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2628 SRC_SEL_Y(SQ_SEL_Y), 2629 SRC_SEL_Z(SQ_SEL_Z), 2630 SRC_SEL_W(SQ_SEL_W), 2631 BURST_COUNT(1), 2632 VALID_PIXEL_MODE(0), 2633 CF_INST(SQ_CF_INST_EXPORT_DONE), 2634 MARK(0), 2635 BARRIER(1)); 2636 2637 /* 11 */ 2638 shader[i++] = CF_DWORD0(ADDR(0), 2639 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2640 shader[i++] = CF_DWORD1(POP_COUNT(0), 2641 CF_CONST(0), 2642 COND(SQ_CF_COND_ACTIVE), 2643 I_COUNT(0), 2644 VALID_PIXEL_MODE(0), 2645 CF_INST(SQ_CF_INST_RETURN), 2646 BARRIER(1)); 2647 2648 /* 12 interpolate src tex coords - mask */ 2649 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2650 SRC0_REL(ABSOLUTE), 2651 SRC0_ELEM(ELEM_Y), 2652 SRC0_NEG(0), 2653 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2654 SRC1_REL(ABSOLUTE), 2655 SRC1_ELEM(ELEM_X), 2656 SRC1_NEG(0), 2657 INDEX_MODE(SQ_INDEX_AR_X), 2658 PRED_SEL(SQ_PRED_SEL_OFF), 2659 LAST(0)); 2660 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2661 SRC1_ABS(0), 2662 UPDATE_EXECUTE_MASK(0), 2663 UPDATE_PRED(0), 2664 WRITE_MASK(1), 2665 OMOD(SQ_ALU_OMOD_OFF), 2666 ALU_INST(SQ_OP2_INST_INTERP_XY), 2667 BANK_SWIZZLE(SQ_ALU_VEC_210), 2668 DST_GPR(1), 2669 DST_REL(ABSOLUTE), 2670 DST_ELEM(ELEM_X), 2671 CLAMP(0)); 2672 /* 13 */ 2673 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2674 SRC0_REL(ABSOLUTE), 2675 SRC0_ELEM(ELEM_X), 2676 SRC0_NEG(0), 2677 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2678 SRC1_REL(ABSOLUTE), 2679 SRC1_ELEM(ELEM_X), 2680 SRC1_NEG(0), 2681 INDEX_MODE(SQ_INDEX_AR_X), 2682 PRED_SEL(SQ_PRED_SEL_OFF), 2683 LAST(0)); 2684 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2685 SRC1_ABS(0), 2686 UPDATE_EXECUTE_MASK(0), 2687 UPDATE_PRED(0), 2688 WRITE_MASK(1), 2689 OMOD(SQ_ALU_OMOD_OFF), 2690 ALU_INST(SQ_OP2_INST_INTERP_XY), 2691 BANK_SWIZZLE(SQ_ALU_VEC_210), 2692 DST_GPR(1), 2693 DST_REL(ABSOLUTE), 2694 DST_ELEM(ELEM_Y), 2695 CLAMP(0)); 2696 /* 14 */ 2697 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2698 SRC0_REL(ABSOLUTE), 2699 SRC0_ELEM(ELEM_Y), 2700 SRC0_NEG(0), 2701 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2702 SRC1_REL(ABSOLUTE), 2703 SRC1_ELEM(ELEM_X), 2704 SRC1_NEG(0), 2705 INDEX_MODE(SQ_INDEX_AR_X), 2706 PRED_SEL(SQ_PRED_SEL_OFF), 2707 LAST(0)); 2708 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2709 SRC1_ABS(0), 2710 UPDATE_EXECUTE_MASK(0), 2711 UPDATE_PRED(0), 2712 WRITE_MASK(0), 2713 OMOD(SQ_ALU_OMOD_OFF), 2714 ALU_INST(SQ_OP2_INST_INTERP_XY), 2715 BANK_SWIZZLE(SQ_ALU_VEC_210), 2716 DST_GPR(1), 2717 DST_REL(ABSOLUTE), 2718 DST_ELEM(ELEM_Z), 2719 CLAMP(0)); 2720 /* 15 */ 2721 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2722 SRC0_REL(ABSOLUTE), 2723 SRC0_ELEM(ELEM_X), 2724 SRC0_NEG(0), 2725 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2726 SRC1_REL(ABSOLUTE), 2727 SRC1_ELEM(ELEM_X), 2728 SRC1_NEG(0), 2729 INDEX_MODE(SQ_INDEX_AR_X), 2730 PRED_SEL(SQ_PRED_SEL_OFF), 2731 LAST(1)); 2732 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2733 SRC1_ABS(0), 2734 UPDATE_EXECUTE_MASK(0), 2735 UPDATE_PRED(0), 2736 WRITE_MASK(0), 2737 OMOD(SQ_ALU_OMOD_OFF), 2738 ALU_INST(SQ_OP2_INST_INTERP_XY), 2739 BANK_SWIZZLE(SQ_ALU_VEC_210), 2740 DST_GPR(1), 2741 DST_REL(ABSOLUTE), 2742 DST_ELEM(ELEM_W), 2743 CLAMP(0)); 2744 2745 /* 16 interpolate mask tex coords */ 2746 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2747 SRC0_REL(ABSOLUTE), 2748 SRC0_ELEM(ELEM_Y), 2749 SRC0_NEG(0), 2750 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2751 SRC1_REL(ABSOLUTE), 2752 SRC1_ELEM(ELEM_X), 2753 SRC1_NEG(0), 2754 INDEX_MODE(SQ_INDEX_AR_X), 2755 PRED_SEL(SQ_PRED_SEL_OFF), 2756 LAST(0)); 2757 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2758 SRC1_ABS(0), 2759 UPDATE_EXECUTE_MASK(0), 2760 UPDATE_PRED(0), 2761 WRITE_MASK(1), 2762 OMOD(SQ_ALU_OMOD_OFF), 2763 ALU_INST(SQ_OP2_INST_INTERP_XY), 2764 BANK_SWIZZLE(SQ_ALU_VEC_210), 2765 DST_GPR(0), 2766 DST_REL(ABSOLUTE), 2767 DST_ELEM(ELEM_X), 2768 CLAMP(0)); 2769 /* 17 */ 2770 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2771 SRC0_REL(ABSOLUTE), 2772 SRC0_ELEM(ELEM_X), 2773 SRC0_NEG(0), 2774 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2775 SRC1_REL(ABSOLUTE), 2776 SRC1_ELEM(ELEM_X), 2777 SRC1_NEG(0), 2778 INDEX_MODE(SQ_INDEX_AR_X), 2779 PRED_SEL(SQ_PRED_SEL_OFF), 2780 LAST(0)); 2781 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2782 SRC1_ABS(0), 2783 UPDATE_EXECUTE_MASK(0), 2784 UPDATE_PRED(0), 2785 WRITE_MASK(1), 2786 OMOD(SQ_ALU_OMOD_OFF), 2787 ALU_INST(SQ_OP2_INST_INTERP_XY), 2788 BANK_SWIZZLE(SQ_ALU_VEC_210), 2789 DST_GPR(0), 2790 DST_REL(ABSOLUTE), 2791 DST_ELEM(ELEM_Y), 2792 CLAMP(0)); 2793 /* 18 */ 2794 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2795 SRC0_REL(ABSOLUTE), 2796 SRC0_ELEM(ELEM_Y), 2797 SRC0_NEG(0), 2798 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2799 SRC1_REL(ABSOLUTE), 2800 SRC1_ELEM(ELEM_X), 2801 SRC1_NEG(0), 2802 INDEX_MODE(SQ_INDEX_AR_X), 2803 PRED_SEL(SQ_PRED_SEL_OFF), 2804 LAST(0)); 2805 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2806 SRC1_ABS(0), 2807 UPDATE_EXECUTE_MASK(0), 2808 UPDATE_PRED(0), 2809 WRITE_MASK(0), 2810 OMOD(SQ_ALU_OMOD_OFF), 2811 ALU_INST(SQ_OP2_INST_INTERP_XY), 2812 BANK_SWIZZLE(SQ_ALU_VEC_210), 2813 DST_GPR(0), 2814 DST_REL(ABSOLUTE), 2815 DST_ELEM(ELEM_Z), 2816 CLAMP(0)); 2817 /* 19 */ 2818 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2819 SRC0_REL(ABSOLUTE), 2820 SRC0_ELEM(ELEM_X), 2821 SRC0_NEG(0), 2822 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2823 SRC1_REL(ABSOLUTE), 2824 SRC1_ELEM(ELEM_X), 2825 SRC1_NEG(0), 2826 INDEX_MODE(SQ_INDEX_AR_X), 2827 PRED_SEL(SQ_PRED_SEL_OFF), 2828 LAST(1)); 2829 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2830 SRC1_ABS(0), 2831 UPDATE_EXECUTE_MASK(0), 2832 UPDATE_PRED(0), 2833 WRITE_MASK(0), 2834 OMOD(SQ_ALU_OMOD_OFF), 2835 ALU_INST(SQ_OP2_INST_INTERP_XY), 2836 BANK_SWIZZLE(SQ_ALU_VEC_210), 2837 DST_GPR(0), 2838 DST_REL(ABSOLUTE), 2839 DST_ELEM(ELEM_W), 2840 CLAMP(0)); 2841 2842 /* 20 - alu 0 */ 2843 /* MUL gpr[2].x gpr[0].x gpr[1].x */ 2844 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2845 SRC0_REL(ABSOLUTE), 2846 SRC0_ELEM(ELEM_X), 2847 SRC0_NEG(0), 2848 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2849 SRC1_REL(ABSOLUTE), 2850 SRC1_ELEM(ELEM_X), 2851 SRC1_NEG(0), 2852 INDEX_MODE(SQ_INDEX_LOOP), 2853 PRED_SEL(SQ_PRED_SEL_OFF), 2854 LAST(0)); 2855 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2856 SRC1_ABS(0), 2857 UPDATE_EXECUTE_MASK(0), 2858 UPDATE_PRED(0), 2859 WRITE_MASK(1), 2860 OMOD(SQ_ALU_OMOD_OFF), 2861 ALU_INST(SQ_OP2_INST_MUL), 2862 BANK_SWIZZLE(SQ_ALU_VEC_012), 2863 DST_GPR(2), 2864 DST_REL(ABSOLUTE), 2865 DST_ELEM(ELEM_X), 2866 CLAMP(1)); 2867 /* 21 - alu 1 */ 2868 /* MUL gpr[2].y gpr[0].y gpr[1].y */ 2869 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2870 SRC0_REL(ABSOLUTE), 2871 SRC0_ELEM(ELEM_Y), 2872 SRC0_NEG(0), 2873 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2874 SRC1_REL(ABSOLUTE), 2875 SRC1_ELEM(ELEM_Y), 2876 SRC1_NEG(0), 2877 INDEX_MODE(SQ_INDEX_LOOP), 2878 PRED_SEL(SQ_PRED_SEL_OFF), 2879 LAST(0)); 2880 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2881 SRC1_ABS(0), 2882 UPDATE_EXECUTE_MASK(0), 2883 UPDATE_PRED(0), 2884 WRITE_MASK(1), 2885 OMOD(SQ_ALU_OMOD_OFF), 2886 ALU_INST(SQ_OP2_INST_MUL), 2887 BANK_SWIZZLE(SQ_ALU_VEC_012), 2888 DST_GPR(2), 2889 DST_REL(ABSOLUTE), 2890 DST_ELEM(ELEM_Y), 2891 CLAMP(1)); 2892 /* 22 - alu 2 */ 2893 /* MUL gpr[2].z gpr[0].z gpr[1].z */ 2894 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2895 SRC0_REL(ABSOLUTE), 2896 SRC0_ELEM(ELEM_Z), 2897 SRC0_NEG(0), 2898 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2899 SRC1_REL(ABSOLUTE), 2900 SRC1_ELEM(ELEM_Z), 2901 SRC1_NEG(0), 2902 INDEX_MODE(SQ_INDEX_LOOP), 2903 PRED_SEL(SQ_PRED_SEL_OFF), 2904 LAST(0)); 2905 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2906 SRC1_ABS(0), 2907 UPDATE_EXECUTE_MASK(0), 2908 UPDATE_PRED(0), 2909 WRITE_MASK(1), 2910 OMOD(SQ_ALU_OMOD_OFF), 2911 ALU_INST(SQ_OP2_INST_MUL), 2912 BANK_SWIZZLE(SQ_ALU_VEC_012), 2913 DST_GPR(2), 2914 DST_REL(ABSOLUTE), 2915 DST_ELEM(ELEM_Z), 2916 CLAMP(1)); 2917 /* 23 - alu 3 */ 2918 /* MUL gpr[2].w gpr[0].w gpr[1].w */ 2919 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2920 SRC0_REL(ABSOLUTE), 2921 SRC0_ELEM(ELEM_W), 2922 SRC0_NEG(0), 2923 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2924 SRC1_REL(ABSOLUTE), 2925 SRC1_ELEM(ELEM_W), 2926 SRC1_NEG(0), 2927 INDEX_MODE(SQ_INDEX_LOOP), 2928 PRED_SEL(SQ_PRED_SEL_OFF), 2929 LAST(1)); 2930 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2931 SRC1_ABS(0), 2932 UPDATE_EXECUTE_MASK(0), 2933 UPDATE_PRED(0), 2934 WRITE_MASK(1), 2935 OMOD(SQ_ALU_OMOD_OFF), 2936 ALU_INST(SQ_OP2_INST_MUL), 2937 BANK_SWIZZLE(SQ_ALU_VEC_012), 2938 DST_GPR(2), 2939 DST_REL(ABSOLUTE), 2940 DST_ELEM(ELEM_W), 2941 CLAMP(1)); 2942 2943 /* 24 - interpolate tex coords - non-mask */ 2944 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2945 SRC0_REL(ABSOLUTE), 2946 SRC0_ELEM(ELEM_Y), 2947 SRC0_NEG(0), 2948 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2949 SRC1_REL(ABSOLUTE), 2950 SRC1_ELEM(ELEM_X), 2951 SRC1_NEG(0), 2952 INDEX_MODE(SQ_INDEX_AR_X), 2953 PRED_SEL(SQ_PRED_SEL_OFF), 2954 LAST(0)); 2955 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2956 SRC1_ABS(0), 2957 UPDATE_EXECUTE_MASK(0), 2958 UPDATE_PRED(0), 2959 WRITE_MASK(1), 2960 OMOD(SQ_ALU_OMOD_OFF), 2961 ALU_INST(SQ_OP2_INST_INTERP_XY), 2962 BANK_SWIZZLE(SQ_ALU_VEC_210), 2963 DST_GPR(0), 2964 DST_REL(ABSOLUTE), 2965 DST_ELEM(ELEM_X), 2966 CLAMP(0)); 2967 /* 25 */ 2968 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2969 SRC0_REL(ABSOLUTE), 2970 SRC0_ELEM(ELEM_X), 2971 SRC0_NEG(0), 2972 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2973 SRC1_REL(ABSOLUTE), 2974 SRC1_ELEM(ELEM_X), 2975 SRC1_NEG(0), 2976 INDEX_MODE(SQ_INDEX_AR_X), 2977 PRED_SEL(SQ_PRED_SEL_OFF), 2978 LAST(0)); 2979 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2980 SRC1_ABS(0), 2981 UPDATE_EXECUTE_MASK(0), 2982 UPDATE_PRED(0), 2983 WRITE_MASK(1), 2984 OMOD(SQ_ALU_OMOD_OFF), 2985 ALU_INST(SQ_OP2_INST_INTERP_XY), 2986 BANK_SWIZZLE(SQ_ALU_VEC_210), 2987 DST_GPR(0), 2988 DST_REL(ABSOLUTE), 2989 DST_ELEM(ELEM_Y), 2990 CLAMP(0)); 2991 /* 26 */ 2992 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2993 SRC0_REL(ABSOLUTE), 2994 SRC0_ELEM(ELEM_Y), 2995 SRC0_NEG(0), 2996 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2997 SRC1_REL(ABSOLUTE), 2998 SRC1_ELEM(ELEM_X), 2999 SRC1_NEG(0), 3000 INDEX_MODE(SQ_INDEX_AR_X), 3001 PRED_SEL(SQ_PRED_SEL_OFF), 3002 LAST(0)); 3003 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3004 SRC1_ABS(0), 3005 UPDATE_EXECUTE_MASK(0), 3006 UPDATE_PRED(0), 3007 WRITE_MASK(0), 3008 OMOD(SQ_ALU_OMOD_OFF), 3009 ALU_INST(SQ_OP2_INST_INTERP_XY), 3010 BANK_SWIZZLE(SQ_ALU_VEC_210), 3011 DST_GPR(0), 3012 DST_REL(ABSOLUTE), 3013 DST_ELEM(ELEM_Z), 3014 CLAMP(0)); 3015 /* 27 */ 3016 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3017 SRC0_REL(ABSOLUTE), 3018 SRC0_ELEM(ELEM_X), 3019 SRC0_NEG(0), 3020 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 3021 SRC1_REL(ABSOLUTE), 3022 SRC1_ELEM(ELEM_X), 3023 SRC1_NEG(0), 3024 INDEX_MODE(SQ_INDEX_AR_X), 3025 PRED_SEL(SQ_PRED_SEL_OFF), 3026 LAST(1)); 3027 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3028 SRC1_ABS(0), 3029 UPDATE_EXECUTE_MASK(0), 3030 UPDATE_PRED(0), 3031 WRITE_MASK(0), 3032 OMOD(SQ_ALU_OMOD_OFF), 3033 ALU_INST(SQ_OP2_INST_INTERP_XY), 3034 BANK_SWIZZLE(SQ_ALU_VEC_210), 3035 DST_GPR(0), 3036 DST_REL(ABSOLUTE), 3037 DST_ELEM(ELEM_W), 3038 CLAMP(0)); 3039 3040 /* 28/29 - src - mask */ 3041 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3042 INST_MOD(0), 3043 FETCH_WHOLE_QUAD(0), 3044 RESOURCE_ID(0), 3045 SRC_GPR(1), 3046 SRC_REL(ABSOLUTE), 3047 ALT_CONST(0), 3048 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3049 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3050 shader[i++] = TEX_DWORD1(DST_GPR(1), 3051 DST_REL(ABSOLUTE), 3052 DST_SEL_X(SQ_SEL_X), 3053 DST_SEL_Y(SQ_SEL_Y), 3054 DST_SEL_Z(SQ_SEL_Z), 3055 DST_SEL_W(SQ_SEL_W), 3056 LOD_BIAS(0), 3057 COORD_TYPE_X(TEX_NORMALIZED), 3058 COORD_TYPE_Y(TEX_NORMALIZED), 3059 COORD_TYPE_Z(TEX_NORMALIZED), 3060 COORD_TYPE_W(TEX_NORMALIZED)); 3061 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3062 OFFSET_Y(0), 3063 OFFSET_Z(0), 3064 SAMPLER_ID(0), 3065 SRC_SEL_X(SQ_SEL_X), 3066 SRC_SEL_Y(SQ_SEL_Y), 3067 SRC_SEL_Z(SQ_SEL_0), 3068 SRC_SEL_W(SQ_SEL_1)); 3069 shader[i++] = TEX_DWORD_PAD; 3070 /* 30/31 - mask */ 3071 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3072 INST_MOD(0), 3073 FETCH_WHOLE_QUAD(0), 3074 RESOURCE_ID(1), 3075 SRC_GPR(0), 3076 SRC_REL(ABSOLUTE), 3077 ALT_CONST(0), 3078 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3079 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3080 shader[i++] = TEX_DWORD1(DST_GPR(0), 3081 DST_REL(ABSOLUTE), 3082 DST_SEL_X(SQ_SEL_X), 3083 DST_SEL_Y(SQ_SEL_Y), 3084 DST_SEL_Z(SQ_SEL_Z), 3085 DST_SEL_W(SQ_SEL_W), 3086 LOD_BIAS(0), 3087 COORD_TYPE_X(TEX_NORMALIZED), 3088 COORD_TYPE_Y(TEX_NORMALIZED), 3089 COORD_TYPE_Z(TEX_NORMALIZED), 3090 COORD_TYPE_W(TEX_NORMALIZED)); 3091 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3092 OFFSET_Y(0), 3093 OFFSET_Z(0), 3094 SAMPLER_ID(1), 3095 SRC_SEL_X(SQ_SEL_X), 3096 SRC_SEL_Y(SQ_SEL_Y), 3097 SRC_SEL_Z(SQ_SEL_0), 3098 SRC_SEL_W(SQ_SEL_1)); 3099 shader[i++] = TEX_DWORD_PAD; 3100 3101 /* 32/33 - src - non-mask */ 3102 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3103 INST_MOD(0), 3104 FETCH_WHOLE_QUAD(0), 3105 RESOURCE_ID(0), 3106 SRC_GPR(0), 3107 SRC_REL(ABSOLUTE), 3108 ALT_CONST(0), 3109 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3110 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3111 shader[i++] = TEX_DWORD1(DST_GPR(0), 3112 DST_REL(ABSOLUTE), 3113 DST_SEL_X(SQ_SEL_X), 3114 DST_SEL_Y(SQ_SEL_Y), 3115 DST_SEL_Z(SQ_SEL_Z), 3116 DST_SEL_W(SQ_SEL_W), 3117 LOD_BIAS(0), 3118 COORD_TYPE_X(TEX_NORMALIZED), 3119 COORD_TYPE_Y(TEX_NORMALIZED), 3120 COORD_TYPE_Z(TEX_NORMALIZED), 3121 COORD_TYPE_W(TEX_NORMALIZED)); 3122 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3123 OFFSET_Y(0), 3124 OFFSET_Z(0), 3125 SAMPLER_ID(0), 3126 SRC_SEL_X(SQ_SEL_X), 3127 SRC_SEL_Y(SQ_SEL_Y), 3128 SRC_SEL_Z(SQ_SEL_0), 3129 SRC_SEL_W(SQ_SEL_1)); 3130 shader[i++] = TEX_DWORD_PAD; 3131 3132 return i; 3133} 3134 3135#endif 3136