evergreen_shader.c revision 921a55d8
1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#ifdef XF86DRM_MODE 32 33#include "xf86.h" 34 35#include "evergreen_shader.h" 36#include "evergreen_reg.h" 37 38/* solid vs --------------------------------------- */ 39int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 40{ 41 int i = 0; 42 43 /* 0 */ 44 shader[i++] = CF_DWORD0(ADDR(4), 45 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 46 shader[i++] = CF_DWORD1(POP_COUNT(0), 47 CF_CONST(0), 48 COND(SQ_CF_COND_ACTIVE), 49 I_COUNT(1), 50 VALID_PIXEL_MODE(0), 51 END_OF_PROGRAM(0), 52 CF_INST(SQ_CF_INST_VC), 53 WHOLE_QUAD_MODE(0), 54 BARRIER(1)); 55 /* 1 */ 56 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 57 TYPE(SQ_EXPORT_POS), 58 RW_GPR(1), 59 RW_REL(ABSOLUTE), 60 INDEX_GPR(0), 61 ELEM_SIZE(0)); 62 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 63 SRC_SEL_Y(SQ_SEL_Y), 64 SRC_SEL_Z(SQ_SEL_Z), 65 SRC_SEL_W(SQ_SEL_W), 66 BURST_COUNT(1), 67 VALID_PIXEL_MODE(0), 68 END_OF_PROGRAM(0), 69 CF_INST(SQ_CF_INST_EXPORT_DONE), 70 MARK(0), 71 BARRIER(1)); 72 /* 2 - always export a param whether it's used or not */ 73 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 74 TYPE(SQ_EXPORT_PARAM), 75 RW_GPR(0), 76 RW_REL(ABSOLUTE), 77 INDEX_GPR(0), 78 ELEM_SIZE(0)); 79 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 80 SRC_SEL_Y(SQ_SEL_Y), 81 SRC_SEL_Z(SQ_SEL_Z), 82 SRC_SEL_W(SQ_SEL_W), 83 BURST_COUNT(0), 84 VALID_PIXEL_MODE(0), 85 END_OF_PROGRAM(1), 86 CF_INST(SQ_CF_INST_EXPORT_DONE), 87 MARK(0), 88 BARRIER(0)); 89 /* 3 - padding */ 90 shader[i++] = 0x00000000; 91 shader[i++] = 0x00000000; 92 /* 4/5 */ 93 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 94 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 95 FETCH_WHOLE_QUAD(0), 96 BUFFER_ID(0), 97 SRC_GPR(0), 98 SRC_REL(ABSOLUTE), 99 SRC_SEL_X(SQ_SEL_X), 100 MEGA_FETCH_COUNT(8)); 101 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 102 DST_REL(0), 103 DST_SEL_X(SQ_SEL_X), 104 DST_SEL_Y(SQ_SEL_Y), 105 DST_SEL_Z(SQ_SEL_0), 106 DST_SEL_W(SQ_SEL_1), 107 USE_CONST_FIELDS(0), 108 DATA_FORMAT(FMT_32_32_FLOAT), 109 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 110 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 111 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 112 shader[i++] = VTX_DWORD2(OFFSET(0), 113 ENDIAN_SWAP(ENDIAN_NONE), 114 CONST_BUF_NO_STRIDE(0), 115 MEGA_FETCH(1), 116 ALT_CONST(0), 117 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 118 shader[i++] = VTX_DWORD_PAD; 119 120 return i; 121} 122 123/* solid ps --------------------------------------- */ 124int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 125{ 126 int i = 0; 127 128 /* 0 */ 129 shader[i++] = CF_ALU_DWORD0(ADDR(2), 130 KCACHE_BANK0(0), 131 KCACHE_BANK1(0), 132 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 133 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 134 KCACHE_ADDR0(0), 135 KCACHE_ADDR1(0), 136 I_COUNT(4), 137 ALT_CONST(0), 138 CF_INST(SQ_CF_INST_ALU), 139 WHOLE_QUAD_MODE(0), 140 BARRIER(1)); 141 /* 1 */ 142 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 143 TYPE(SQ_EXPORT_PIXEL), 144 RW_GPR(0), 145 RW_REL(ABSOLUTE), 146 INDEX_GPR(0), 147 ELEM_SIZE(1)); 148 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 149 SRC_SEL_Y(SQ_SEL_Y), 150 SRC_SEL_Z(SQ_SEL_Z), 151 SRC_SEL_W(SQ_SEL_W), 152 BURST_COUNT(1), 153 VALID_PIXEL_MODE(0), 154 END_OF_PROGRAM(1), 155 CF_INST(SQ_CF_INST_EXPORT_DONE), 156 MARK(0), 157 BARRIER(1)); 158 159 /* 2 */ 160 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 161 SRC0_REL(ABSOLUTE), 162 SRC0_ELEM(ELEM_X), 163 SRC0_NEG(0), 164 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 165 SRC1_REL(ABSOLUTE), 166 SRC1_ELEM(ELEM_X), 167 SRC1_NEG(0), 168 INDEX_MODE(SQ_INDEX_AR_X), 169 PRED_SEL(SQ_PRED_SEL_OFF), 170 LAST(0)); 171 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 172 SRC1_ABS(0), 173 UPDATE_EXECUTE_MASK(0), 174 UPDATE_PRED(0), 175 WRITE_MASK(1), 176 OMOD(SQ_ALU_OMOD_OFF), 177 ALU_INST(SQ_OP2_INST_MOV), 178 BANK_SWIZZLE(SQ_ALU_VEC_012), 179 DST_GPR(0), 180 DST_REL(ABSOLUTE), 181 DST_ELEM(ELEM_X), 182 CLAMP(1)); 183 /* 3 */ 184 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 185 SRC0_REL(ABSOLUTE), 186 SRC0_ELEM(ELEM_Y), 187 SRC0_NEG(0), 188 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 189 SRC1_REL(ABSOLUTE), 190 SRC1_ELEM(ELEM_Y), 191 SRC1_NEG(0), 192 INDEX_MODE(SQ_INDEX_AR_X), 193 PRED_SEL(SQ_PRED_SEL_OFF), 194 LAST(0)); 195 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 196 SRC1_ABS(0), 197 UPDATE_EXECUTE_MASK(0), 198 UPDATE_PRED(0), 199 WRITE_MASK(1), 200 OMOD(SQ_ALU_OMOD_OFF), 201 ALU_INST(SQ_OP2_INST_MOV), 202 BANK_SWIZZLE(SQ_ALU_VEC_012), 203 DST_GPR(0), 204 DST_REL(ABSOLUTE), 205 DST_ELEM(ELEM_Y), 206 CLAMP(1)); 207 /* 4 */ 208 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 209 SRC0_REL(ABSOLUTE), 210 SRC0_ELEM(ELEM_Z), 211 SRC0_NEG(0), 212 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 213 SRC1_REL(ABSOLUTE), 214 SRC1_ELEM(ELEM_Z), 215 SRC1_NEG(0), 216 INDEX_MODE(SQ_INDEX_AR_X), 217 PRED_SEL(SQ_PRED_SEL_OFF), 218 LAST(0)); 219 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 220 SRC1_ABS(0), 221 UPDATE_EXECUTE_MASK(0), 222 UPDATE_PRED(0), 223 WRITE_MASK(1), 224 OMOD(SQ_ALU_OMOD_OFF), 225 ALU_INST(SQ_OP2_INST_MOV), 226 BANK_SWIZZLE(SQ_ALU_VEC_012), 227 DST_GPR(0), 228 DST_REL(ABSOLUTE), 229 DST_ELEM(ELEM_Z), 230 CLAMP(1)); 231 /* 5 */ 232 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 233 SRC0_REL(ABSOLUTE), 234 SRC0_ELEM(ELEM_W), 235 SRC0_NEG(0), 236 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 237 SRC1_REL(ABSOLUTE), 238 SRC1_ELEM(ELEM_W), 239 SRC1_NEG(0), 240 INDEX_MODE(SQ_INDEX_AR_X), 241 PRED_SEL(SQ_PRED_SEL_OFF), 242 LAST(1)); 243 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 244 SRC1_ABS(0), 245 UPDATE_EXECUTE_MASK(0), 246 UPDATE_PRED(0), 247 WRITE_MASK(1), 248 OMOD(SQ_ALU_OMOD_OFF), 249 ALU_INST(SQ_OP2_INST_MOV), 250 BANK_SWIZZLE(SQ_ALU_VEC_012), 251 DST_GPR(0), 252 DST_REL(ABSOLUTE), 253 DST_ELEM(ELEM_W), 254 CLAMP(1)); 255 256 return i; 257} 258 259/* copy vs --------------------------------------- */ 260int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 261{ 262 int i = 0; 263 264 /* 0 */ 265 shader[i++] = CF_DWORD0(ADDR(4), 266 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 267 shader[i++] = CF_DWORD1(POP_COUNT(0), 268 CF_CONST(0), 269 COND(SQ_CF_COND_ACTIVE), 270 I_COUNT(2), 271 VALID_PIXEL_MODE(0), 272 END_OF_PROGRAM(0), 273 CF_INST(SQ_CF_INST_VC), 274 WHOLE_QUAD_MODE(0), 275 BARRIER(1)); 276 /* 1 */ 277 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 278 TYPE(SQ_EXPORT_POS), 279 RW_GPR(1), 280 RW_REL(ABSOLUTE), 281 INDEX_GPR(0), 282 ELEM_SIZE(0)); 283 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 284 SRC_SEL_Y(SQ_SEL_Y), 285 SRC_SEL_Z(SQ_SEL_Z), 286 SRC_SEL_W(SQ_SEL_W), 287 BURST_COUNT(0), 288 VALID_PIXEL_MODE(0), 289 END_OF_PROGRAM(0), 290 CF_INST(SQ_CF_INST_EXPORT_DONE), 291 MARK(0), 292 BARRIER(1)); 293 /* 2 */ 294 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 295 TYPE(SQ_EXPORT_PARAM), 296 RW_GPR(0), 297 RW_REL(ABSOLUTE), 298 INDEX_GPR(0), 299 ELEM_SIZE(0)); 300 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 301 SRC_SEL_Y(SQ_SEL_Y), 302 SRC_SEL_Z(SQ_SEL_Z), 303 SRC_SEL_W(SQ_SEL_W), 304 BURST_COUNT(0), 305 VALID_PIXEL_MODE(0), 306 END_OF_PROGRAM(1), 307 CF_INST(SQ_CF_INST_EXPORT_DONE), 308 MARK(0), 309 BARRIER(0)); 310 /* 3 */ 311 shader[i++] = 0x00000000; 312 shader[i++] = 0x00000000; 313 /* 4/5 */ 314 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 315 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 316 FETCH_WHOLE_QUAD(0), 317 BUFFER_ID(0), 318 SRC_GPR(0), 319 SRC_REL(ABSOLUTE), 320 SRC_SEL_X(SQ_SEL_X), 321 MEGA_FETCH_COUNT(16)); 322 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 323 DST_REL(0), 324 DST_SEL_X(SQ_SEL_X), 325 DST_SEL_Y(SQ_SEL_Y), 326 DST_SEL_Z(SQ_SEL_0), 327 DST_SEL_W(SQ_SEL_1), 328 USE_CONST_FIELDS(0), 329 DATA_FORMAT(FMT_32_32_FLOAT), 330 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 331 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 332 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 333 shader[i++] = VTX_DWORD2(OFFSET(0), 334 ENDIAN_SWAP(ENDIAN_NONE), 335 CONST_BUF_NO_STRIDE(0), 336 MEGA_FETCH(1), 337 ALT_CONST(0), 338 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 339 shader[i++] = VTX_DWORD_PAD; 340 /* 6/7 */ 341 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 342 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 343 FETCH_WHOLE_QUAD(0), 344 BUFFER_ID(0), 345 SRC_GPR(0), 346 SRC_REL(ABSOLUTE), 347 SRC_SEL_X(SQ_SEL_X), 348 MEGA_FETCH_COUNT(8)); 349 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 350 DST_REL(0), 351 DST_SEL_X(SQ_SEL_X), 352 DST_SEL_Y(SQ_SEL_Y), 353 DST_SEL_Z(SQ_SEL_0), 354 DST_SEL_W(SQ_SEL_1), 355 USE_CONST_FIELDS(0), 356 DATA_FORMAT(FMT_32_32_FLOAT), 357 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 358 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 359 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 360 shader[i++] = VTX_DWORD2(OFFSET(8), 361 ENDIAN_SWAP(ENDIAN_NONE), 362 CONST_BUF_NO_STRIDE(0), 363 MEGA_FETCH(0), 364 ALT_CONST(0), 365 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 366 shader[i++] = VTX_DWORD_PAD; 367 368 return i; 369} 370 371/* copy ps --------------------------------------- */ 372int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 373{ 374 int i = 0; 375 376 /* CF INST 0 */ 377 shader[i++] = CF_ALU_DWORD0(ADDR(3), 378 KCACHE_BANK0(0), 379 KCACHE_BANK1(0), 380 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 381 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 382 KCACHE_ADDR0(0), 383 KCACHE_ADDR1(0), 384 I_COUNT(4), 385 ALT_CONST(0), 386 CF_INST(SQ_CF_INST_ALU), 387 WHOLE_QUAD_MODE(0), 388 BARRIER(1)); 389 /* CF INST 1 */ 390 shader[i++] = CF_DWORD0(ADDR(8), 391 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 392 shader[i++] = CF_DWORD1(POP_COUNT(0), 393 CF_CONST(0), 394 COND(SQ_CF_COND_ACTIVE), 395 I_COUNT(1), 396 VALID_PIXEL_MODE(0), 397 END_OF_PROGRAM(0), 398 CF_INST(SQ_CF_INST_TC), 399 WHOLE_QUAD_MODE(0), 400 BARRIER(1)); 401 /* CF INST 2 */ 402 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 403 TYPE(SQ_EXPORT_PIXEL), 404 RW_GPR(0), 405 RW_REL(ABSOLUTE), 406 INDEX_GPR(0), 407 ELEM_SIZE(1)); 408 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 409 SRC_SEL_Y(SQ_SEL_Y), 410 SRC_SEL_Z(SQ_SEL_Z), 411 SRC_SEL_W(SQ_SEL_W), 412 BURST_COUNT(1), 413 VALID_PIXEL_MODE(0), 414 END_OF_PROGRAM(1), 415 CF_INST(SQ_CF_INST_EXPORT_DONE), 416 MARK(0), 417 BARRIER(1)); 418 419 /* 3 interpolate tex coords */ 420 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 421 SRC0_REL(ABSOLUTE), 422 SRC0_ELEM(ELEM_Y), 423 SRC0_NEG(0), 424 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 425 SRC1_REL(ABSOLUTE), 426 SRC1_ELEM(ELEM_X), 427 SRC1_NEG(0), 428 INDEX_MODE(SQ_INDEX_AR_X), 429 PRED_SEL(SQ_PRED_SEL_OFF), 430 LAST(0)); 431 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 432 SRC1_ABS(0), 433 UPDATE_EXECUTE_MASK(0), 434 UPDATE_PRED(0), 435 WRITE_MASK(1), 436 OMOD(SQ_ALU_OMOD_OFF), 437 ALU_INST(SQ_OP2_INST_INTERP_XY), 438 BANK_SWIZZLE(SQ_ALU_VEC_210), 439 DST_GPR(0), 440 DST_REL(ABSOLUTE), 441 DST_ELEM(ELEM_X), 442 CLAMP(0)); 443 /* 4 */ 444 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 445 SRC0_REL(ABSOLUTE), 446 SRC0_ELEM(ELEM_X), 447 SRC0_NEG(0), 448 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 449 SRC1_REL(ABSOLUTE), 450 SRC1_ELEM(ELEM_X), 451 SRC1_NEG(0), 452 INDEX_MODE(SQ_INDEX_AR_X), 453 PRED_SEL(SQ_PRED_SEL_OFF), 454 LAST(0)); 455 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 456 SRC1_ABS(0), 457 UPDATE_EXECUTE_MASK(0), 458 UPDATE_PRED(0), 459 WRITE_MASK(1), 460 OMOD(SQ_ALU_OMOD_OFF), 461 ALU_INST(SQ_OP2_INST_INTERP_XY), 462 BANK_SWIZZLE(SQ_ALU_VEC_210), 463 DST_GPR(0), 464 DST_REL(ABSOLUTE), 465 DST_ELEM(ELEM_Y), 466 CLAMP(0)); 467 /* 5 */ 468 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 469 SRC0_REL(ABSOLUTE), 470 SRC0_ELEM(ELEM_Y), 471 SRC0_NEG(0), 472 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 473 SRC1_REL(ABSOLUTE), 474 SRC1_ELEM(ELEM_X), 475 SRC1_NEG(0), 476 INDEX_MODE(SQ_INDEX_AR_X), 477 PRED_SEL(SQ_PRED_SEL_OFF), 478 LAST(0)); 479 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 480 SRC1_ABS(0), 481 UPDATE_EXECUTE_MASK(0), 482 UPDATE_PRED(0), 483 WRITE_MASK(0), 484 OMOD(SQ_ALU_OMOD_OFF), 485 ALU_INST(SQ_OP2_INST_INTERP_XY), 486 BANK_SWIZZLE(SQ_ALU_VEC_210), 487 DST_GPR(0), 488 DST_REL(ABSOLUTE), 489 DST_ELEM(ELEM_Z), 490 CLAMP(0)); 491 /* 6 */ 492 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 493 SRC0_REL(ABSOLUTE), 494 SRC0_ELEM(ELEM_X), 495 SRC0_NEG(0), 496 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 497 SRC1_REL(ABSOLUTE), 498 SRC1_ELEM(ELEM_X), 499 SRC1_NEG(0), 500 INDEX_MODE(SQ_INDEX_AR_X), 501 PRED_SEL(SQ_PRED_SEL_OFF), 502 LAST(1)); 503 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 504 SRC1_ABS(0), 505 UPDATE_EXECUTE_MASK(0), 506 UPDATE_PRED(0), 507 WRITE_MASK(0), 508 OMOD(SQ_ALU_OMOD_OFF), 509 ALU_INST(SQ_OP2_INST_INTERP_XY), 510 BANK_SWIZZLE(SQ_ALU_VEC_210), 511 DST_GPR(0), 512 DST_REL(ABSOLUTE), 513 DST_ELEM(ELEM_W), 514 CLAMP(0)); 515 516 /* 7 */ 517 shader[i++] = 0x00000000; 518 shader[i++] = 0x00000000; 519 520 /* 8/9 TEX INST 0 */ 521 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 522 INST_MOD(0), 523 FETCH_WHOLE_QUAD(0), 524 RESOURCE_ID(0), 525 SRC_GPR(0), 526 SRC_REL(ABSOLUTE), 527 ALT_CONST(0), 528 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 529 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 530 shader[i++] = TEX_DWORD1(DST_GPR(0), 531 DST_REL(ABSOLUTE), 532 DST_SEL_X(SQ_SEL_X), /* R */ 533 DST_SEL_Y(SQ_SEL_Y), /* G */ 534 DST_SEL_Z(SQ_SEL_Z), /* B */ 535 DST_SEL_W(SQ_SEL_W), /* A */ 536 LOD_BIAS(0), 537 COORD_TYPE_X(TEX_UNNORMALIZED), 538 COORD_TYPE_Y(TEX_UNNORMALIZED), 539 COORD_TYPE_Z(TEX_UNNORMALIZED), 540 COORD_TYPE_W(TEX_UNNORMALIZED)); 541 shader[i++] = TEX_DWORD2(OFFSET_X(0), 542 OFFSET_Y(0), 543 OFFSET_Z(0), 544 SAMPLER_ID(0), 545 SRC_SEL_X(SQ_SEL_X), 546 SRC_SEL_Y(SQ_SEL_Y), 547 SRC_SEL_Z(SQ_SEL_0), 548 SRC_SEL_W(SQ_SEL_1)); 549 shader[i++] = TEX_DWORD_PAD; 550 551 return i; 552} 553 554int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 555{ 556 int i = 0; 557 558 /* 0 */ 559 shader[i++] = CF_DWORD0(ADDR(6), 560 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 561 shader[i++] = CF_DWORD1(POP_COUNT(0), 562 CF_CONST(0), 563 COND(SQ_CF_COND_ACTIVE), 564 I_COUNT(2), 565 VALID_PIXEL_MODE(0), 566 END_OF_PROGRAM(0), 567 CF_INST(SQ_CF_INST_VC), 568 WHOLE_QUAD_MODE(0), 569 BARRIER(1)); 570 571 /* 1 - ALU */ 572 shader[i++] = CF_ALU_DWORD0(ADDR(4), 573 KCACHE_BANK0(0), 574 KCACHE_BANK1(0), 575 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 576 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 577 KCACHE_ADDR0(0), 578 KCACHE_ADDR1(0), 579 I_COUNT(2), 580 ALT_CONST(0), 581 CF_INST(SQ_CF_INST_ALU), 582 WHOLE_QUAD_MODE(0), 583 BARRIER(1)); 584 585 /* 2 */ 586 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 587 TYPE(SQ_EXPORT_POS), 588 RW_GPR(1), 589 RW_REL(ABSOLUTE), 590 INDEX_GPR(0), 591 ELEM_SIZE(3)); 592 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 593 SRC_SEL_Y(SQ_SEL_Y), 594 SRC_SEL_Z(SQ_SEL_Z), 595 SRC_SEL_W(SQ_SEL_W), 596 BURST_COUNT(1), 597 VALID_PIXEL_MODE(0), 598 END_OF_PROGRAM(0), 599 CF_INST(SQ_CF_INST_EXPORT_DONE), 600 MARK(0), 601 BARRIER(1)); 602 /* 3 */ 603 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 604 TYPE(SQ_EXPORT_PARAM), 605 RW_GPR(0), 606 RW_REL(ABSOLUTE), 607 INDEX_GPR(0), 608 ELEM_SIZE(3)); 609 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 610 SRC_SEL_Y(SQ_SEL_Y), 611 SRC_SEL_Z(SQ_SEL_Z), 612 SRC_SEL_W(SQ_SEL_W), 613 BURST_COUNT(1), 614 VALID_PIXEL_MODE(0), 615 END_OF_PROGRAM(1), 616 CF_INST(SQ_CF_INST_EXPORT_DONE), 617 MARK(0), 618 BARRIER(0)); 619 620 621 /* 4 texX / w */ 622 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 623 SRC0_REL(ABSOLUTE), 624 SRC0_ELEM(ELEM_X), 625 SRC0_NEG(0), 626 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 627 SRC1_REL(ABSOLUTE), 628 SRC1_ELEM(ELEM_X), 629 SRC1_NEG(0), 630 INDEX_MODE(SQ_INDEX_AR_X), 631 PRED_SEL(SQ_PRED_SEL_OFF), 632 LAST(0)); 633 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 634 SRC1_ABS(0), 635 UPDATE_EXECUTE_MASK(0), 636 UPDATE_PRED(0), 637 WRITE_MASK(1), 638 OMOD(SQ_ALU_OMOD_OFF), 639 ALU_INST(SQ_OP2_INST_MUL), 640 BANK_SWIZZLE(SQ_ALU_VEC_012), 641 DST_GPR(0), 642 DST_REL(ABSOLUTE), 643 DST_ELEM(ELEM_X), 644 CLAMP(0)); 645 646 /* 5 texY / h */ 647 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 648 SRC0_REL(ABSOLUTE), 649 SRC0_ELEM(ELEM_Y), 650 SRC0_NEG(0), 651 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 652 SRC1_REL(ABSOLUTE), 653 SRC1_ELEM(ELEM_Y), 654 SRC1_NEG(0), 655 INDEX_MODE(SQ_INDEX_AR_X), 656 PRED_SEL(SQ_PRED_SEL_OFF), 657 LAST(1)); 658 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 659 SRC1_ABS(0), 660 UPDATE_EXECUTE_MASK(0), 661 UPDATE_PRED(0), 662 WRITE_MASK(1), 663 OMOD(SQ_ALU_OMOD_OFF), 664 ALU_INST(SQ_OP2_INST_MUL), 665 BANK_SWIZZLE(SQ_ALU_VEC_012), 666 DST_GPR(0), 667 DST_REL(ABSOLUTE), 668 DST_ELEM(ELEM_Y), 669 CLAMP(0)); 670 671 /* 6/7 */ 672 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 673 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 674 FETCH_WHOLE_QUAD(0), 675 BUFFER_ID(0), 676 SRC_GPR(0), 677 SRC_REL(ABSOLUTE), 678 SRC_SEL_X(SQ_SEL_X), 679 MEGA_FETCH_COUNT(16)); 680 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 681 DST_REL(ABSOLUTE), 682 DST_SEL_X(SQ_SEL_X), 683 DST_SEL_Y(SQ_SEL_Y), 684 DST_SEL_Z(SQ_SEL_0), 685 DST_SEL_W(SQ_SEL_1), 686 USE_CONST_FIELDS(0), 687 DATA_FORMAT(FMT_32_32_FLOAT), 688 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 689 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 690 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 691 shader[i++] = VTX_DWORD2(OFFSET(0), 692 ENDIAN_SWAP(ENDIAN_NONE), 693 CONST_BUF_NO_STRIDE(0), 694 MEGA_FETCH(1), 695 ALT_CONST(0), 696 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 697 shader[i++] = VTX_DWORD_PAD; 698 /* 8/9 */ 699 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 700 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 701 FETCH_WHOLE_QUAD(0), 702 BUFFER_ID(0), 703 SRC_GPR(0), 704 SRC_REL(ABSOLUTE), 705 SRC_SEL_X(SQ_SEL_X), 706 MEGA_FETCH_COUNT(8)); 707 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 708 DST_REL(ABSOLUTE), 709 DST_SEL_X(SQ_SEL_X), 710 DST_SEL_Y(SQ_SEL_Y), 711 DST_SEL_Z(SQ_SEL_0), 712 DST_SEL_W(SQ_SEL_1), 713 USE_CONST_FIELDS(0), 714 DATA_FORMAT(FMT_32_32_FLOAT), 715 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 716 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 717 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 718 shader[i++] = VTX_DWORD2(OFFSET(8), 719 ENDIAN_SWAP(ENDIAN_NONE), 720 CONST_BUF_NO_STRIDE(0), 721 MEGA_FETCH(0), 722 ALT_CONST(0), 723 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 724 shader[i++] = VTX_DWORD_PAD; 725 726 return i; 727} 728 729int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 730{ 731 int i = 0; 732 733 /* 0 */ 734 shader[i++] = CF_ALU_DWORD0(ADDR(5), 735 KCACHE_BANK0(0), 736 KCACHE_BANK1(0), 737 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 738 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 739 KCACHE_ADDR0(0), 740 KCACHE_ADDR1(0), 741 I_COUNT(4), 742 ALT_CONST(0), 743 CF_INST(SQ_CF_INST_ALU), 744 WHOLE_QUAD_MODE(0), 745 BARRIER(1)); 746 /* 1 */ 747 shader[i++] = CF_DWORD0(ADDR(21), 748 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 749 shader[i++] = CF_DWORD1(POP_COUNT(0), 750 CF_CONST(0), 751 COND(SQ_CF_COND_BOOL), 752 I_COUNT(0), 753 VALID_PIXEL_MODE(0), 754 END_OF_PROGRAM(0), 755 CF_INST(SQ_CF_INST_CALL), 756 WHOLE_QUAD_MODE(0), 757 BARRIER(0)); 758 /* 2 */ 759 shader[i++] = CF_DWORD0(ADDR(30), 760 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 761 shader[i++] = CF_DWORD1(POP_COUNT(0), 762 CF_CONST(0), 763 COND(SQ_CF_COND_NOT_BOOL), 764 I_COUNT(0), 765 VALID_PIXEL_MODE(0), 766 END_OF_PROGRAM(0), 767 CF_INST(SQ_CF_INST_CALL), 768 WHOLE_QUAD_MODE(0), 769 BARRIER(0)); 770 /* 3 */ 771 shader[i++] = CF_ALU_DWORD0(ADDR(9), 772 KCACHE_BANK0(0), 773 KCACHE_BANK1(0), 774 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 775 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 776 KCACHE_ADDR0(0), 777 KCACHE_ADDR1(0), 778 I_COUNT(12), 779 ALT_CONST(0), 780 CF_INST(SQ_CF_INST_ALU), 781 WHOLE_QUAD_MODE(0), 782 BARRIER(1)); 783 /* 4 */ 784 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 785 TYPE(SQ_EXPORT_PIXEL), 786 RW_GPR(2), 787 RW_REL(ABSOLUTE), 788 INDEX_GPR(0), 789 ELEM_SIZE(3)); 790 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 791 SRC_SEL_Y(SQ_SEL_Y), 792 SRC_SEL_Z(SQ_SEL_Z), 793 SRC_SEL_W(SQ_SEL_W), 794 BURST_COUNT(1), 795 VALID_PIXEL_MODE(0), 796 END_OF_PROGRAM(1), 797 CF_INST(SQ_CF_INST_EXPORT_DONE), 798 MARK(0), 799 BARRIER(1)); 800 /* 5 interpolate tex coords */ 801 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 802 SRC0_REL(ABSOLUTE), 803 SRC0_ELEM(ELEM_Y), 804 SRC0_NEG(0), 805 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 806 SRC1_REL(ABSOLUTE), 807 SRC1_ELEM(ELEM_X), 808 SRC1_NEG(0), 809 INDEX_MODE(SQ_INDEX_AR_X), 810 PRED_SEL(SQ_PRED_SEL_OFF), 811 LAST(0)); 812 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 813 SRC1_ABS(0), 814 UPDATE_EXECUTE_MASK(0), 815 UPDATE_PRED(0), 816 WRITE_MASK(1), 817 OMOD(SQ_ALU_OMOD_OFF), 818 ALU_INST(SQ_OP2_INST_INTERP_XY), 819 BANK_SWIZZLE(SQ_ALU_VEC_210), 820 DST_GPR(0), 821 DST_REL(ABSOLUTE), 822 DST_ELEM(ELEM_X), 823 CLAMP(0)); 824 /* 6 */ 825 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 826 SRC0_REL(ABSOLUTE), 827 SRC0_ELEM(ELEM_X), 828 SRC0_NEG(0), 829 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 830 SRC1_REL(ABSOLUTE), 831 SRC1_ELEM(ELEM_X), 832 SRC1_NEG(0), 833 INDEX_MODE(SQ_INDEX_AR_X), 834 PRED_SEL(SQ_PRED_SEL_OFF), 835 LAST(0)); 836 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 837 SRC1_ABS(0), 838 UPDATE_EXECUTE_MASK(0), 839 UPDATE_PRED(0), 840 WRITE_MASK(1), 841 OMOD(SQ_ALU_OMOD_OFF), 842 ALU_INST(SQ_OP2_INST_INTERP_XY), 843 BANK_SWIZZLE(SQ_ALU_VEC_210), 844 DST_GPR(0), 845 DST_REL(ABSOLUTE), 846 DST_ELEM(ELEM_Y), 847 CLAMP(0)); 848 /* 7 */ 849 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 850 SRC0_REL(ABSOLUTE), 851 SRC0_ELEM(ELEM_Y), 852 SRC0_NEG(0), 853 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 854 SRC1_REL(ABSOLUTE), 855 SRC1_ELEM(ELEM_X), 856 SRC1_NEG(0), 857 INDEX_MODE(SQ_INDEX_AR_X), 858 PRED_SEL(SQ_PRED_SEL_OFF), 859 LAST(0)); 860 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 861 SRC1_ABS(0), 862 UPDATE_EXECUTE_MASK(0), 863 UPDATE_PRED(0), 864 WRITE_MASK(0), 865 OMOD(SQ_ALU_OMOD_OFF), 866 ALU_INST(SQ_OP2_INST_INTERP_XY), 867 BANK_SWIZZLE(SQ_ALU_VEC_210), 868 DST_GPR(0), 869 DST_REL(ABSOLUTE), 870 DST_ELEM(ELEM_Z), 871 CLAMP(0)); 872 /* 8 */ 873 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 874 SRC0_REL(ABSOLUTE), 875 SRC0_ELEM(ELEM_X), 876 SRC0_NEG(0), 877 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 878 SRC1_REL(ABSOLUTE), 879 SRC1_ELEM(ELEM_X), 880 SRC1_NEG(0), 881 INDEX_MODE(SQ_INDEX_AR_X), 882 PRED_SEL(SQ_PRED_SEL_OFF), 883 LAST(1)); 884 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 885 SRC1_ABS(0), 886 UPDATE_EXECUTE_MASK(0), 887 UPDATE_PRED(0), 888 WRITE_MASK(0), 889 OMOD(SQ_ALU_OMOD_OFF), 890 ALU_INST(SQ_OP2_INST_INTERP_XY), 891 BANK_SWIZZLE(SQ_ALU_VEC_210), 892 DST_GPR(0), 893 DST_REL(ABSOLUTE), 894 DST_ELEM(ELEM_W), 895 CLAMP(0)); 896 897 /* 9,10,11,12 */ 898 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 899 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 900 SRC0_REL(ABSOLUTE), 901 SRC0_ELEM(ELEM_W), 902 SRC0_NEG(0), 903 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 904 SRC1_REL(ABSOLUTE), 905 SRC1_ELEM(ELEM_X), 906 SRC1_NEG(0), 907 INDEX_MODE(SQ_INDEX_LOOP), 908 PRED_SEL(SQ_PRED_SEL_OFF), 909 LAST(0)); 910 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 911 SRC2_REL(ABSOLUTE), 912 SRC2_ELEM(ELEM_X), 913 SRC2_NEG(0), 914 ALU_INST(SQ_OP3_INST_MULADD), 915 BANK_SWIZZLE(SQ_ALU_VEC_012), 916 DST_GPR(2), 917 DST_REL(ABSOLUTE), 918 DST_ELEM(ELEM_X), 919 CLAMP(0)); 920 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 921 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 922 SRC0_REL(ABSOLUTE), 923 SRC0_ELEM(ELEM_W), 924 SRC0_NEG(0), 925 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 926 SRC1_REL(ABSOLUTE), 927 SRC1_ELEM(ELEM_X), 928 SRC1_NEG(0), 929 INDEX_MODE(SQ_INDEX_LOOP), 930 PRED_SEL(SQ_PRED_SEL_OFF), 931 LAST(0)); 932 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 933 SRC2_REL(ABSOLUTE), 934 SRC2_ELEM(ELEM_Y), 935 SRC2_NEG(0), 936 ALU_INST(SQ_OP3_INST_MULADD), 937 BANK_SWIZZLE(SQ_ALU_VEC_012), 938 DST_GPR(2), 939 DST_REL(ABSOLUTE), 940 DST_ELEM(ELEM_Y), 941 CLAMP(0)); 942 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 943 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 944 SRC0_REL(ABSOLUTE), 945 SRC0_ELEM(ELEM_W), 946 SRC0_NEG(0), 947 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 948 SRC1_REL(ABSOLUTE), 949 SRC1_ELEM(ELEM_X), 950 SRC1_NEG(0), 951 INDEX_MODE(SQ_INDEX_LOOP), 952 PRED_SEL(SQ_PRED_SEL_OFF), 953 LAST(0)); 954 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 955 SRC2_REL(ABSOLUTE), 956 SRC2_ELEM(ELEM_Z), 957 SRC2_NEG(0), 958 ALU_INST(SQ_OP3_INST_MULADD), 959 BANK_SWIZZLE(SQ_ALU_VEC_012), 960 DST_GPR(2), 961 DST_REL(ABSOLUTE), 962 DST_ELEM(ELEM_Z), 963 CLAMP(0)); 964 /* r2.w = MAD(0, 0, 1) */ 965 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 966 SRC0_REL(ABSOLUTE), 967 SRC0_ELEM(ELEM_X), 968 SRC0_NEG(0), 969 SRC1_SEL(SQ_ALU_SRC_0), 970 SRC1_REL(ABSOLUTE), 971 SRC1_ELEM(ELEM_X), 972 SRC1_NEG(0), 973 INDEX_MODE(SQ_INDEX_LOOP), 974 PRED_SEL(SQ_PRED_SEL_OFF), 975 LAST(1)); 976 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 977 SRC2_REL(ABSOLUTE), 978 SRC2_ELEM(ELEM_X), 979 SRC2_NEG(0), 980 ALU_INST(SQ_OP3_INST_MULADD), 981 BANK_SWIZZLE(SQ_ALU_VEC_012), 982 DST_GPR(2), 983 DST_REL(ABSOLUTE), 984 DST_ELEM(ELEM_W), 985 CLAMP(0)); 986 987 /* 13,14,15,16 */ 988 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 989 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 990 SRC0_REL(ABSOLUTE), 991 SRC0_ELEM(ELEM_X), 992 SRC0_NEG(0), 993 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 994 SRC1_REL(ABSOLUTE), 995 SRC1_ELEM(ELEM_Y), 996 SRC1_NEG(0), 997 INDEX_MODE(SQ_INDEX_LOOP), 998 PRED_SEL(SQ_PRED_SEL_OFF), 999 LAST(0)); 1000 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1001 SRC2_REL(ABSOLUTE), 1002 SRC2_ELEM(ELEM_X), 1003 SRC2_NEG(0), 1004 ALU_INST(SQ_OP3_INST_MULADD), 1005 BANK_SWIZZLE(SQ_ALU_VEC_012), 1006 DST_GPR(2), 1007 DST_REL(ABSOLUTE), 1008 DST_ELEM(ELEM_X), 1009 CLAMP(0)); 1010 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 1011 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1012 SRC0_REL(ABSOLUTE), 1013 SRC0_ELEM(ELEM_Y), 1014 SRC0_NEG(0), 1015 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1016 SRC1_REL(ABSOLUTE), 1017 SRC1_ELEM(ELEM_Y), 1018 SRC1_NEG(0), 1019 INDEX_MODE(SQ_INDEX_LOOP), 1020 PRED_SEL(SQ_PRED_SEL_OFF), 1021 LAST(0)); 1022 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1023 SRC2_REL(ABSOLUTE), 1024 SRC2_ELEM(ELEM_Y), 1025 SRC2_NEG(0), 1026 ALU_INST(SQ_OP3_INST_MULADD), 1027 BANK_SWIZZLE(SQ_ALU_VEC_012), 1028 DST_GPR(2), 1029 DST_REL(ABSOLUTE), 1030 DST_ELEM(ELEM_Y), 1031 CLAMP(0)); 1032 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 1033 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1034 SRC0_REL(ABSOLUTE), 1035 SRC0_ELEM(ELEM_Z), 1036 SRC0_NEG(0), 1037 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1038 SRC1_REL(ABSOLUTE), 1039 SRC1_ELEM(ELEM_Y), 1040 SRC1_NEG(0), 1041 INDEX_MODE(SQ_INDEX_LOOP), 1042 PRED_SEL(SQ_PRED_SEL_OFF), 1043 LAST(0)); 1044 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1045 SRC2_REL(ABSOLUTE), 1046 SRC2_ELEM(ELEM_Z), 1047 SRC2_NEG(0), 1048 ALU_INST(SQ_OP3_INST_MULADD), 1049 BANK_SWIZZLE(SQ_ALU_VEC_012), 1050 DST_GPR(2), 1051 DST_REL(ABSOLUTE), 1052 DST_ELEM(ELEM_Z), 1053 CLAMP(0)); 1054 /* r2.w = MAD(0, 0, 1) */ 1055 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1056 SRC0_REL(ABSOLUTE), 1057 SRC0_ELEM(ELEM_X), 1058 SRC0_NEG(0), 1059 SRC1_SEL(SQ_ALU_SRC_0), 1060 SRC1_REL(ABSOLUTE), 1061 SRC1_ELEM(ELEM_X), 1062 SRC1_NEG(0), 1063 INDEX_MODE(SQ_INDEX_LOOP), 1064 PRED_SEL(SQ_PRED_SEL_OFF), 1065 LAST(1)); 1066 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1067 SRC2_REL(ABSOLUTE), 1068 SRC2_ELEM(ELEM_W), 1069 SRC2_NEG(0), 1070 ALU_INST(SQ_OP3_INST_MULADD), 1071 BANK_SWIZZLE(SQ_ALU_VEC_012), 1072 DST_GPR(2), 1073 DST_REL(ABSOLUTE), 1074 DST_ELEM(ELEM_W), 1075 CLAMP(0)); 1076 /* 17,18,19,20 */ 1077 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 1078 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1079 SRC0_REL(ABSOLUTE), 1080 SRC0_ELEM(ELEM_X), 1081 SRC0_NEG(0), 1082 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1083 SRC1_REL(ABSOLUTE), 1084 SRC1_ELEM(ELEM_Z), 1085 SRC1_NEG(0), 1086 INDEX_MODE(SQ_INDEX_LOOP), 1087 PRED_SEL(SQ_PRED_SEL_OFF), 1088 LAST(0)); 1089 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1090 SRC2_REL(ABSOLUTE), 1091 SRC2_ELEM(ELEM_X), 1092 SRC2_NEG(0), 1093 ALU_INST(SQ_OP3_INST_MULADD), 1094 BANK_SWIZZLE(SQ_ALU_VEC_012), 1095 DST_GPR(2), 1096 DST_REL(ABSOLUTE), 1097 DST_ELEM(ELEM_X), 1098 CLAMP(1)); 1099 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 1100 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1101 SRC0_REL(ABSOLUTE), 1102 SRC0_ELEM(ELEM_Y), 1103 SRC0_NEG(0), 1104 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1105 SRC1_REL(ABSOLUTE), 1106 SRC1_ELEM(ELEM_Z), 1107 SRC1_NEG(0), 1108 INDEX_MODE(SQ_INDEX_LOOP), 1109 PRED_SEL(SQ_PRED_SEL_OFF), 1110 LAST(0)); 1111 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1112 SRC2_REL(ABSOLUTE), 1113 SRC2_ELEM(ELEM_Y), 1114 SRC2_NEG(0), 1115 ALU_INST(SQ_OP3_INST_MULADD), 1116 BANK_SWIZZLE(SQ_ALU_VEC_012), 1117 DST_GPR(2), 1118 DST_REL(ABSOLUTE), 1119 DST_ELEM(ELEM_Y), 1120 CLAMP(1)); 1121 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 1122 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1123 SRC0_REL(ABSOLUTE), 1124 SRC0_ELEM(ELEM_Z), 1125 SRC0_NEG(0), 1126 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1127 SRC1_REL(ABSOLUTE), 1128 SRC1_ELEM(ELEM_Z), 1129 SRC1_NEG(0), 1130 INDEX_MODE(SQ_INDEX_LOOP), 1131 PRED_SEL(SQ_PRED_SEL_OFF), 1132 LAST(0)); 1133 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1134 SRC2_REL(ABSOLUTE), 1135 SRC2_ELEM(ELEM_Z), 1136 SRC2_NEG(0), 1137 ALU_INST(SQ_OP3_INST_MULADD), 1138 BANK_SWIZZLE(SQ_ALU_VEC_012), 1139 DST_GPR(2), 1140 DST_REL(ABSOLUTE), 1141 DST_ELEM(ELEM_Z), 1142 CLAMP(1)); 1143 /* r2.w = MAD(0, 0, 1) */ 1144 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1145 SRC0_REL(ABSOLUTE), 1146 SRC0_ELEM(ELEM_X), 1147 SRC0_NEG(0), 1148 SRC1_SEL(SQ_ALU_SRC_0), 1149 SRC1_REL(ABSOLUTE), 1150 SRC1_ELEM(ELEM_X), 1151 SRC1_NEG(0), 1152 INDEX_MODE(SQ_INDEX_LOOP), 1153 PRED_SEL(SQ_PRED_SEL_OFF), 1154 LAST(1)); 1155 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1156 SRC2_REL(ABSOLUTE), 1157 SRC2_ELEM(ELEM_X), 1158 SRC2_NEG(0), 1159 ALU_INST(SQ_OP3_INST_MULADD), 1160 BANK_SWIZZLE(SQ_ALU_VEC_012), 1161 DST_GPR(2), 1162 DST_REL(ABSOLUTE), 1163 DST_ELEM(ELEM_W), 1164 CLAMP(1)); 1165 1166 /* 21 */ 1167 shader[i++] = CF_DWORD0(ADDR(24), 1168 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1169 shader[i++] = CF_DWORD1(POP_COUNT(0), 1170 CF_CONST(0), 1171 COND(SQ_CF_COND_ACTIVE), 1172 I_COUNT(3), 1173 VALID_PIXEL_MODE(0), 1174 END_OF_PROGRAM(0), 1175 CF_INST(SQ_CF_INST_TC), 1176 WHOLE_QUAD_MODE(0), 1177 BARRIER(1)); 1178 /* 22 */ 1179 shader[i++] = CF_DWORD0(ADDR(0), 1180 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1181 shader[i++] = CF_DWORD1(POP_COUNT(0), 1182 CF_CONST(0), 1183 COND(SQ_CF_COND_ACTIVE), 1184 I_COUNT(0), 1185 VALID_PIXEL_MODE(0), 1186 END_OF_PROGRAM(0), 1187 CF_INST(SQ_CF_INST_RETURN), 1188 WHOLE_QUAD_MODE(0), 1189 BARRIER(1)); 1190 /* 23 */ 1191 shader[i++] = 0x00000000; 1192 shader[i++] = 0x00000000; 1193 /* 24/25 */ 1194 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1195 INST_MOD(0), 1196 FETCH_WHOLE_QUAD(0), 1197 RESOURCE_ID(0), 1198 SRC_GPR(0), 1199 SRC_REL(ABSOLUTE), 1200 ALT_CONST(0), 1201 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1202 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1203 shader[i++] = TEX_DWORD1(DST_GPR(1), 1204 DST_REL(ABSOLUTE), 1205 DST_SEL_X(SQ_SEL_X), 1206 DST_SEL_Y(SQ_SEL_MASK), 1207 DST_SEL_Z(SQ_SEL_MASK), 1208 DST_SEL_W(SQ_SEL_1), 1209 LOD_BIAS(0), 1210 COORD_TYPE_X(TEX_NORMALIZED), 1211 COORD_TYPE_Y(TEX_NORMALIZED), 1212 COORD_TYPE_Z(TEX_NORMALIZED), 1213 COORD_TYPE_W(TEX_NORMALIZED)); 1214 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1215 OFFSET_Y(0), 1216 OFFSET_Z(0), 1217 SAMPLER_ID(0), 1218 SRC_SEL_X(SQ_SEL_X), 1219 SRC_SEL_Y(SQ_SEL_Y), 1220 SRC_SEL_Z(SQ_SEL_0), 1221 SRC_SEL_W(SQ_SEL_1)); 1222 shader[i++] = TEX_DWORD_PAD; 1223 /* 26/27 */ 1224 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1225 INST_MOD(0), 1226 FETCH_WHOLE_QUAD(0), 1227 RESOURCE_ID(1), 1228 SRC_GPR(0), 1229 SRC_REL(ABSOLUTE), 1230 ALT_CONST(0), 1231 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1232 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1233 shader[i++] = TEX_DWORD1(DST_GPR(1), 1234 DST_REL(ABSOLUTE), 1235 DST_SEL_X(SQ_SEL_MASK), 1236 DST_SEL_Y(SQ_SEL_MASK), 1237 DST_SEL_Z(SQ_SEL_X), 1238 DST_SEL_W(SQ_SEL_MASK), 1239 LOD_BIAS(0), 1240 COORD_TYPE_X(TEX_NORMALIZED), 1241 COORD_TYPE_Y(TEX_NORMALIZED), 1242 COORD_TYPE_Z(TEX_NORMALIZED), 1243 COORD_TYPE_W(TEX_NORMALIZED)); 1244 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1245 OFFSET_Y(0), 1246 OFFSET_Z(0), 1247 SAMPLER_ID(1), 1248 SRC_SEL_X(SQ_SEL_X), 1249 SRC_SEL_Y(SQ_SEL_Y), 1250 SRC_SEL_Z(SQ_SEL_0), 1251 SRC_SEL_W(SQ_SEL_1)); 1252 shader[i++] = TEX_DWORD_PAD; 1253 /* 28/29 */ 1254 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1255 INST_MOD(0), 1256 FETCH_WHOLE_QUAD(0), 1257 RESOURCE_ID(2), 1258 SRC_GPR(0), 1259 SRC_REL(ABSOLUTE), 1260 ALT_CONST(0), 1261 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1262 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1263 shader[i++] = TEX_DWORD1(DST_GPR(1), 1264 DST_REL(ABSOLUTE), 1265 DST_SEL_X(SQ_SEL_MASK), 1266 DST_SEL_Y(SQ_SEL_X), 1267 DST_SEL_Z(SQ_SEL_MASK), 1268 DST_SEL_W(SQ_SEL_MASK), 1269 LOD_BIAS(0), 1270 COORD_TYPE_X(TEX_NORMALIZED), 1271 COORD_TYPE_Y(TEX_NORMALIZED), 1272 COORD_TYPE_Z(TEX_NORMALIZED), 1273 COORD_TYPE_W(TEX_NORMALIZED)); 1274 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1275 OFFSET_Y(0), 1276 OFFSET_Z(0), 1277 SAMPLER_ID(2), 1278 SRC_SEL_X(SQ_SEL_X), 1279 SRC_SEL_Y(SQ_SEL_Y), 1280 SRC_SEL_Z(SQ_SEL_0), 1281 SRC_SEL_W(SQ_SEL_1)); 1282 shader[i++] = TEX_DWORD_PAD; 1283 /* 30 */ 1284 shader[i++] = CF_DWORD0(ADDR(32), 1285 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1286 shader[i++] = CF_DWORD1(POP_COUNT(0), 1287 CF_CONST(0), 1288 COND(SQ_CF_COND_ACTIVE), 1289 I_COUNT(2), 1290 VALID_PIXEL_MODE(0), 1291 END_OF_PROGRAM(0), 1292 CF_INST(SQ_CF_INST_TC), 1293 WHOLE_QUAD_MODE(0), 1294 BARRIER(1)); 1295 /* 31 */ 1296 shader[i++] = CF_DWORD0(ADDR(0), 1297 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1298 shader[i++] = CF_DWORD1(POP_COUNT(0), 1299 CF_CONST(0), 1300 COND(SQ_CF_COND_ACTIVE), 1301 I_COUNT(0), 1302 VALID_PIXEL_MODE(0), 1303 END_OF_PROGRAM(0), 1304 CF_INST(SQ_CF_INST_RETURN), 1305 WHOLE_QUAD_MODE(0), 1306 BARRIER(1)); 1307 /* 32/33 */ 1308 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1309 INST_MOD(0), 1310 FETCH_WHOLE_QUAD(0), 1311 RESOURCE_ID(0), 1312 SRC_GPR(0), 1313 SRC_REL(ABSOLUTE), 1314 ALT_CONST(0), 1315 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1316 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1317 shader[i++] = TEX_DWORD1(DST_GPR(1), 1318 DST_REL(ABSOLUTE), 1319 DST_SEL_X(SQ_SEL_X), 1320 DST_SEL_Y(SQ_SEL_MASK), 1321 DST_SEL_Z(SQ_SEL_MASK), 1322 DST_SEL_W(SQ_SEL_1), 1323 LOD_BIAS(0), 1324 COORD_TYPE_X(TEX_NORMALIZED), 1325 COORD_TYPE_Y(TEX_NORMALIZED), 1326 COORD_TYPE_Z(TEX_NORMALIZED), 1327 COORD_TYPE_W(TEX_NORMALIZED)); 1328 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1329 OFFSET_Y(0), 1330 OFFSET_Z(0), 1331 SAMPLER_ID(0), 1332 SRC_SEL_X(SQ_SEL_X), 1333 SRC_SEL_Y(SQ_SEL_Y), 1334 SRC_SEL_Z(SQ_SEL_0), 1335 SRC_SEL_W(SQ_SEL_1)); 1336 shader[i++] = TEX_DWORD_PAD; 1337 /* 34/35 */ 1338 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1339 INST_MOD(0), 1340 FETCH_WHOLE_QUAD(0), 1341 RESOURCE_ID(1), 1342 SRC_GPR(0), 1343 SRC_REL(ABSOLUTE), 1344 ALT_CONST(0), 1345 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1346 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1347 shader[i++] = TEX_DWORD1(DST_GPR(1), 1348 DST_REL(ABSOLUTE), 1349 DST_SEL_X(SQ_SEL_MASK), 1350 DST_SEL_Y(SQ_SEL_X), 1351 DST_SEL_Z(SQ_SEL_Y), 1352 DST_SEL_W(SQ_SEL_MASK), 1353 LOD_BIAS(0), 1354 COORD_TYPE_X(TEX_NORMALIZED), 1355 COORD_TYPE_Y(TEX_NORMALIZED), 1356 COORD_TYPE_Z(TEX_NORMALIZED), 1357 COORD_TYPE_W(TEX_NORMALIZED)); 1358 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1359 OFFSET_Y(0), 1360 OFFSET_Z(0), 1361 SAMPLER_ID(1), 1362 SRC_SEL_X(SQ_SEL_X), 1363 SRC_SEL_Y(SQ_SEL_Y), 1364 SRC_SEL_Z(SQ_SEL_0), 1365 SRC_SEL_W(SQ_SEL_1)); 1366 shader[i++] = TEX_DWORD_PAD; 1367 1368 return i; 1369} 1370 1371/* comp vs --------------------------------------- */ 1372int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1373{ 1374 int i = 0; 1375 1376 /* 0 */ 1377 shader[i++] = CF_DWORD0(ADDR(3), 1378 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1379 shader[i++] = CF_DWORD1(POP_COUNT(0), 1380 CF_CONST(0), 1381 COND(SQ_CF_COND_BOOL), 1382 I_COUNT(0), 1383 VALID_PIXEL_MODE(0), 1384 END_OF_PROGRAM(0), 1385 CF_INST(SQ_CF_INST_CALL), 1386 WHOLE_QUAD_MODE(0), 1387 BARRIER(0)); 1388 /* 1 */ 1389 shader[i++] = CF_DWORD0(ADDR(9), 1390 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1391 shader[i++] = CF_DWORD1(POP_COUNT(0), 1392 CF_CONST(0), 1393 COND(SQ_CF_COND_NOT_BOOL), 1394 I_COUNT(0), 1395 VALID_PIXEL_MODE(0), 1396 END_OF_PROGRAM(0), 1397 CF_INST(SQ_CF_INST_CALL), 1398 WHOLE_QUAD_MODE(0), 1399 BARRIER(0)); 1400 /* 2 */ 1401 shader[i++] = CF_DWORD0(ADDR(0), 1402 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1403 shader[i++] = CF_DWORD1(POP_COUNT(0), 1404 CF_CONST(0), 1405 COND(SQ_CF_COND_ACTIVE), 1406 I_COUNT(0), 1407 VALID_PIXEL_MODE(0), 1408 END_OF_PROGRAM(1), 1409 CF_INST(SQ_CF_INST_NOP), 1410 WHOLE_QUAD_MODE(0), 1411 BARRIER(1)); 1412 /* 3 - mask sub */ 1413 shader[i++] = CF_DWORD0(ADDR(44), 1414 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1415 shader[i++] = CF_DWORD1(POP_COUNT(0), 1416 CF_CONST(0), 1417 COND(SQ_CF_COND_ACTIVE), 1418 I_COUNT(3), 1419 VALID_PIXEL_MODE(0), 1420 END_OF_PROGRAM(0), 1421 CF_INST(SQ_CF_INST_VC), 1422 WHOLE_QUAD_MODE(0), 1423 BARRIER(1)); 1424 1425 /* 4 - ALU */ 1426 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1427 KCACHE_BANK0(0), 1428 KCACHE_BANK1(0), 1429 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1430 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1431 KCACHE_ADDR0(0), 1432 KCACHE_ADDR1(0), 1433 I_COUNT(20), 1434 ALT_CONST(0), 1435 CF_INST(SQ_CF_INST_ALU), 1436 WHOLE_QUAD_MODE(0), 1437 BARRIER(1)); 1438 1439 /* 5 - dst */ 1440 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1441 TYPE(SQ_EXPORT_POS), 1442 RW_GPR(2), 1443 RW_REL(ABSOLUTE), 1444 INDEX_GPR(0), 1445 ELEM_SIZE(0)); 1446 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1447 SRC_SEL_Y(SQ_SEL_Y), 1448 SRC_SEL_Z(SQ_SEL_0), 1449 SRC_SEL_W(SQ_SEL_1), 1450 BURST_COUNT(1), 1451 VALID_PIXEL_MODE(0), 1452 END_OF_PROGRAM(0), 1453 CF_INST(SQ_CF_INST_EXPORT_DONE), 1454 MARK(0), 1455 BARRIER(1)); 1456 /* 6 - src */ 1457 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1458 TYPE(SQ_EXPORT_PARAM), 1459 RW_GPR(1), 1460 RW_REL(ABSOLUTE), 1461 INDEX_GPR(0), 1462 ELEM_SIZE(0)); 1463 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1464 SRC_SEL_Y(SQ_SEL_Y), 1465 SRC_SEL_Z(SQ_SEL_0), 1466 SRC_SEL_W(SQ_SEL_1), 1467 BURST_COUNT(1), 1468 VALID_PIXEL_MODE(0), 1469 END_OF_PROGRAM(0), 1470 CF_INST(SQ_CF_INST_EXPORT), 1471 MARK(0), 1472 BARRIER(0)); 1473 /* 7 - mask */ 1474 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1475 TYPE(SQ_EXPORT_PARAM), 1476 RW_GPR(0), 1477 RW_REL(ABSOLUTE), 1478 INDEX_GPR(0), 1479 ELEM_SIZE(0)); 1480 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1481 SRC_SEL_Y(SQ_SEL_Y), 1482 SRC_SEL_Z(SQ_SEL_0), 1483 SRC_SEL_W(SQ_SEL_1), 1484 BURST_COUNT(1), 1485 VALID_PIXEL_MODE(0), 1486 END_OF_PROGRAM(0), 1487 CF_INST(SQ_CF_INST_EXPORT_DONE), 1488 WHOLE_QUAD_MODE(0), 1489 BARRIER(0)); 1490 /* 8 */ 1491 shader[i++] = CF_DWORD0(ADDR(0), 1492 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1493 shader[i++] = CF_DWORD1(POP_COUNT(0), 1494 CF_CONST(0), 1495 COND(SQ_CF_COND_ACTIVE), 1496 I_COUNT(0), 1497 VALID_PIXEL_MODE(0), 1498 END_OF_PROGRAM(0), 1499 CF_INST(SQ_CF_INST_RETURN), 1500 WHOLE_QUAD_MODE(0), 1501 BARRIER(1)); 1502 /* 9 - non-mask sub */ 1503 shader[i++] = CF_DWORD0(ADDR(50), 1504 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1505 shader[i++] = CF_DWORD1(POP_COUNT(0), 1506 CF_CONST(0), 1507 COND(SQ_CF_COND_ACTIVE), 1508 I_COUNT(2), 1509 VALID_PIXEL_MODE(0), 1510 END_OF_PROGRAM(0), 1511 CF_INST(SQ_CF_INST_VC), 1512 WHOLE_QUAD_MODE(0), 1513 BARRIER(1)); 1514 1515 /* 10 - ALU */ 1516 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1517 KCACHE_BANK0(0), 1518 KCACHE_BANK1(0), 1519 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1520 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1521 KCACHE_ADDR0(0), 1522 KCACHE_ADDR1(0), 1523 I_COUNT(10), 1524 ALT_CONST(0), 1525 CF_INST(SQ_CF_INST_ALU), 1526 WHOLE_QUAD_MODE(0), 1527 BARRIER(1)); 1528 1529 /* 11 - dst */ 1530 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1531 TYPE(SQ_EXPORT_POS), 1532 RW_GPR(1), 1533 RW_REL(ABSOLUTE), 1534 INDEX_GPR(0), 1535 ELEM_SIZE(0)); 1536 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1537 SRC_SEL_Y(SQ_SEL_Y), 1538 SRC_SEL_Z(SQ_SEL_0), 1539 SRC_SEL_W(SQ_SEL_1), 1540 BURST_COUNT(0), 1541 VALID_PIXEL_MODE(0), 1542 END_OF_PROGRAM(0), 1543 CF_INST(SQ_CF_INST_EXPORT_DONE), 1544 MARK(0), 1545 BARRIER(1)); 1546 /* 12 - src */ 1547 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1548 TYPE(SQ_EXPORT_PARAM), 1549 RW_GPR(0), 1550 RW_REL(ABSOLUTE), 1551 INDEX_GPR(0), 1552 ELEM_SIZE(0)); 1553 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1554 SRC_SEL_Y(SQ_SEL_Y), 1555 SRC_SEL_Z(SQ_SEL_0), 1556 SRC_SEL_W(SQ_SEL_1), 1557 BURST_COUNT(0), 1558 VALID_PIXEL_MODE(0), 1559 END_OF_PROGRAM(0), 1560 CF_INST(SQ_CF_INST_EXPORT_DONE), 1561 MARK(0), 1562 BARRIER(0)); 1563 /* 13 */ 1564 shader[i++] = CF_DWORD0(ADDR(0), 1565 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1566 shader[i++] = CF_DWORD1(POP_COUNT(0), 1567 CF_CONST(0), 1568 COND(SQ_CF_COND_ACTIVE), 1569 I_COUNT(0), 1570 VALID_PIXEL_MODE(0), 1571 END_OF_PROGRAM(0), 1572 CF_INST(SQ_CF_INST_RETURN), 1573 WHOLE_QUAD_MODE(0), 1574 BARRIER(1)); 1575 1576 /* 14 srcX.x DOT4 - mask */ 1577 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1578 SRC0_REL(ABSOLUTE), 1579 SRC0_ELEM(ELEM_X), 1580 SRC0_NEG(0), 1581 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1582 SRC1_REL(ABSOLUTE), 1583 SRC1_ELEM(ELEM_X), 1584 SRC1_NEG(0), 1585 INDEX_MODE(SQ_INDEX_LOOP), 1586 PRED_SEL(SQ_PRED_SEL_OFF), 1587 LAST(0)); 1588 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1589 SRC1_ABS(0), 1590 UPDATE_EXECUTE_MASK(0), 1591 UPDATE_PRED(0), 1592 WRITE_MASK(1), 1593 OMOD(SQ_ALU_OMOD_OFF), 1594 ALU_INST(SQ_OP2_INST_DOT4), 1595 BANK_SWIZZLE(SQ_ALU_VEC_012), 1596 DST_GPR(3), 1597 DST_REL(ABSOLUTE), 1598 DST_ELEM(ELEM_X), 1599 CLAMP(0)); 1600 1601 /* 15 srcX.y DOT4 - mask */ 1602 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1603 SRC0_REL(ABSOLUTE), 1604 SRC0_ELEM(ELEM_Y), 1605 SRC0_NEG(0), 1606 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1607 SRC1_REL(ABSOLUTE), 1608 SRC1_ELEM(ELEM_Y), 1609 SRC1_NEG(0), 1610 INDEX_MODE(SQ_INDEX_LOOP), 1611 PRED_SEL(SQ_PRED_SEL_OFF), 1612 LAST(0)); 1613 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1614 SRC1_ABS(0), 1615 UPDATE_EXECUTE_MASK(0), 1616 UPDATE_PRED(0), 1617 WRITE_MASK(0), 1618 OMOD(SQ_ALU_OMOD_OFF), 1619 ALU_INST(SQ_OP2_INST_DOT4), 1620 BANK_SWIZZLE(SQ_ALU_VEC_012), 1621 DST_GPR(3), 1622 DST_REL(ABSOLUTE), 1623 DST_ELEM(ELEM_Y), 1624 CLAMP(0)); 1625 1626 /* 16 srcX.z DOT4 - mask */ 1627 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1628 SRC0_REL(ABSOLUTE), 1629 SRC0_ELEM(ELEM_Z), 1630 SRC0_NEG(0), 1631 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1632 SRC1_REL(ABSOLUTE), 1633 SRC1_ELEM(ELEM_Z), 1634 SRC1_NEG(0), 1635 INDEX_MODE(SQ_INDEX_LOOP), 1636 PRED_SEL(SQ_PRED_SEL_OFF), 1637 LAST(0)); 1638 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1639 SRC1_ABS(0), 1640 UPDATE_EXECUTE_MASK(0), 1641 UPDATE_PRED(0), 1642 WRITE_MASK(0), 1643 OMOD(SQ_ALU_OMOD_OFF), 1644 ALU_INST(SQ_OP2_INST_DOT4), 1645 BANK_SWIZZLE(SQ_ALU_VEC_012), 1646 DST_GPR(3), 1647 DST_REL(ABSOLUTE), 1648 DST_ELEM(ELEM_Z), 1649 CLAMP(0)); 1650 1651 /* 17 srcX.w DOT4 - mask */ 1652 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1653 SRC0_REL(ABSOLUTE), 1654 SRC0_ELEM(ELEM_W), 1655 SRC0_NEG(0), 1656 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1657 SRC1_REL(ABSOLUTE), 1658 SRC1_ELEM(ELEM_W), 1659 SRC1_NEG(0), 1660 INDEX_MODE(SQ_INDEX_LOOP), 1661 PRED_SEL(SQ_PRED_SEL_OFF), 1662 LAST(1)); 1663 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1664 SRC1_ABS(0), 1665 UPDATE_EXECUTE_MASK(0), 1666 UPDATE_PRED(0), 1667 WRITE_MASK(0), 1668 OMOD(SQ_ALU_OMOD_OFF), 1669 ALU_INST(SQ_OP2_INST_DOT4), 1670 BANK_SWIZZLE(SQ_ALU_VEC_012), 1671 DST_GPR(3), 1672 DST_REL(ABSOLUTE), 1673 DST_ELEM(ELEM_W), 1674 CLAMP(0)); 1675 1676 /* 18 srcY.x DOT4 - mask */ 1677 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1678 SRC0_REL(ABSOLUTE), 1679 SRC0_ELEM(ELEM_X), 1680 SRC0_NEG(0), 1681 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1682 SRC1_REL(ABSOLUTE), 1683 SRC1_ELEM(ELEM_X), 1684 SRC1_NEG(0), 1685 INDEX_MODE(SQ_INDEX_LOOP), 1686 PRED_SEL(SQ_PRED_SEL_OFF), 1687 LAST(0)); 1688 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1689 SRC1_ABS(0), 1690 UPDATE_EXECUTE_MASK(0), 1691 UPDATE_PRED(0), 1692 WRITE_MASK(0), 1693 OMOD(SQ_ALU_OMOD_OFF), 1694 ALU_INST(SQ_OP2_INST_DOT4), 1695 BANK_SWIZZLE(SQ_ALU_VEC_012), 1696 DST_GPR(3), 1697 DST_REL(ABSOLUTE), 1698 DST_ELEM(ELEM_X), 1699 CLAMP(0)); 1700 1701 /* 19 srcY.y DOT4 - mask */ 1702 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1703 SRC0_REL(ABSOLUTE), 1704 SRC0_ELEM(ELEM_Y), 1705 SRC0_NEG(0), 1706 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1707 SRC1_REL(ABSOLUTE), 1708 SRC1_ELEM(ELEM_Y), 1709 SRC1_NEG(0), 1710 INDEX_MODE(SQ_INDEX_LOOP), 1711 PRED_SEL(SQ_PRED_SEL_OFF), 1712 LAST(0)); 1713 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1714 SRC1_ABS(0), 1715 UPDATE_EXECUTE_MASK(0), 1716 UPDATE_PRED(0), 1717 WRITE_MASK(1), 1718 OMOD(SQ_ALU_OMOD_OFF), 1719 ALU_INST(SQ_OP2_INST_DOT4), 1720 BANK_SWIZZLE(SQ_ALU_VEC_012), 1721 DST_GPR(3), 1722 DST_REL(ABSOLUTE), 1723 DST_ELEM(ELEM_Y), 1724 CLAMP(0)); 1725 1726 /* 20 srcY.z DOT4 - mask */ 1727 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1728 SRC0_REL(ABSOLUTE), 1729 SRC0_ELEM(ELEM_Z), 1730 SRC0_NEG(0), 1731 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1732 SRC1_REL(ABSOLUTE), 1733 SRC1_ELEM(ELEM_Z), 1734 SRC1_NEG(0), 1735 INDEX_MODE(SQ_INDEX_LOOP), 1736 PRED_SEL(SQ_PRED_SEL_OFF), 1737 LAST(0)); 1738 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1739 SRC1_ABS(0), 1740 UPDATE_EXECUTE_MASK(0), 1741 UPDATE_PRED(0), 1742 WRITE_MASK(0), 1743 OMOD(SQ_ALU_OMOD_OFF), 1744 ALU_INST(SQ_OP2_INST_DOT4), 1745 BANK_SWIZZLE(SQ_ALU_VEC_012), 1746 DST_GPR(3), 1747 DST_REL(ABSOLUTE), 1748 DST_ELEM(ELEM_Z), 1749 CLAMP(0)); 1750 1751 /* 21 srcY.w DOT4 - mask */ 1752 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1753 SRC0_REL(ABSOLUTE), 1754 SRC0_ELEM(ELEM_W), 1755 SRC0_NEG(0), 1756 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1757 SRC1_REL(ABSOLUTE), 1758 SRC1_ELEM(ELEM_W), 1759 SRC1_NEG(0), 1760 INDEX_MODE(SQ_INDEX_LOOP), 1761 PRED_SEL(SQ_PRED_SEL_OFF), 1762 LAST(1)); 1763 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1764 SRC1_ABS(0), 1765 UPDATE_EXECUTE_MASK(0), 1766 UPDATE_PRED(0), 1767 WRITE_MASK(0), 1768 OMOD(SQ_ALU_OMOD_OFF), 1769 ALU_INST(SQ_OP2_INST_DOT4), 1770 BANK_SWIZZLE(SQ_ALU_VEC_012), 1771 DST_GPR(3), 1772 DST_REL(ABSOLUTE), 1773 DST_ELEM(ELEM_W), 1774 CLAMP(0)); 1775 1776 /* 22 maskX.x DOT4 - mask */ 1777 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1778 SRC0_REL(ABSOLUTE), 1779 SRC0_ELEM(ELEM_X), 1780 SRC0_NEG(0), 1781 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1782 SRC1_REL(ABSOLUTE), 1783 SRC1_ELEM(ELEM_X), 1784 SRC1_NEG(0), 1785 INDEX_MODE(SQ_INDEX_LOOP), 1786 PRED_SEL(SQ_PRED_SEL_OFF), 1787 LAST(0)); 1788 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1789 SRC1_ABS(0), 1790 UPDATE_EXECUTE_MASK(0), 1791 UPDATE_PRED(0), 1792 WRITE_MASK(1), 1793 OMOD(SQ_ALU_OMOD_OFF), 1794 ALU_INST(SQ_OP2_INST_DOT4), 1795 BANK_SWIZZLE(SQ_ALU_VEC_012), 1796 DST_GPR(4), 1797 DST_REL(ABSOLUTE), 1798 DST_ELEM(ELEM_X), 1799 CLAMP(0)); 1800 1801 /* 23 maskX.y DOT4 - mask */ 1802 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1803 SRC0_REL(ABSOLUTE), 1804 SRC0_ELEM(ELEM_Y), 1805 SRC0_NEG(0), 1806 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1807 SRC1_REL(ABSOLUTE), 1808 SRC1_ELEM(ELEM_Y), 1809 SRC1_NEG(0), 1810 INDEX_MODE(SQ_INDEX_LOOP), 1811 PRED_SEL(SQ_PRED_SEL_OFF), 1812 LAST(0)); 1813 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1814 SRC1_ABS(0), 1815 UPDATE_EXECUTE_MASK(0), 1816 UPDATE_PRED(0), 1817 WRITE_MASK(0), 1818 OMOD(SQ_ALU_OMOD_OFF), 1819 ALU_INST(SQ_OP2_INST_DOT4), 1820 BANK_SWIZZLE(SQ_ALU_VEC_012), 1821 DST_GPR(4), 1822 DST_REL(ABSOLUTE), 1823 DST_ELEM(ELEM_Y), 1824 CLAMP(0)); 1825 1826 /* 24 maskX.z DOT4 - mask */ 1827 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1828 SRC0_REL(ABSOLUTE), 1829 SRC0_ELEM(ELEM_Z), 1830 SRC0_NEG(0), 1831 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1832 SRC1_REL(ABSOLUTE), 1833 SRC1_ELEM(ELEM_Z), 1834 SRC1_NEG(0), 1835 INDEX_MODE(SQ_INDEX_LOOP), 1836 PRED_SEL(SQ_PRED_SEL_OFF), 1837 LAST(0)); 1838 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1839 SRC1_ABS(0), 1840 UPDATE_EXECUTE_MASK(0), 1841 UPDATE_PRED(0), 1842 WRITE_MASK(0), 1843 OMOD(SQ_ALU_OMOD_OFF), 1844 ALU_INST(SQ_OP2_INST_DOT4), 1845 BANK_SWIZZLE(SQ_ALU_VEC_012), 1846 DST_GPR(4), 1847 DST_REL(ABSOLUTE), 1848 DST_ELEM(ELEM_Z), 1849 CLAMP(0)); 1850 1851 /* 25 maskX.w DOT4 - mask */ 1852 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1853 SRC0_REL(ABSOLUTE), 1854 SRC0_ELEM(ELEM_W), 1855 SRC0_NEG(0), 1856 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1857 SRC1_REL(ABSOLUTE), 1858 SRC1_ELEM(ELEM_W), 1859 SRC1_NEG(0), 1860 INDEX_MODE(SQ_INDEX_LOOP), 1861 PRED_SEL(SQ_PRED_SEL_OFF), 1862 LAST(1)); 1863 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1864 SRC1_ABS(0), 1865 UPDATE_EXECUTE_MASK(0), 1866 UPDATE_PRED(0), 1867 WRITE_MASK(0), 1868 OMOD(SQ_ALU_OMOD_OFF), 1869 ALU_INST(SQ_OP2_INST_DOT4), 1870 BANK_SWIZZLE(SQ_ALU_VEC_012), 1871 DST_GPR(4), 1872 DST_REL(ABSOLUTE), 1873 DST_ELEM(ELEM_W), 1874 CLAMP(0)); 1875 1876 /* 26 maskY.x DOT4 - mask */ 1877 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1878 SRC0_REL(ABSOLUTE), 1879 SRC0_ELEM(ELEM_X), 1880 SRC0_NEG(0), 1881 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1882 SRC1_REL(ABSOLUTE), 1883 SRC1_ELEM(ELEM_X), 1884 SRC1_NEG(0), 1885 INDEX_MODE(SQ_INDEX_LOOP), 1886 PRED_SEL(SQ_PRED_SEL_OFF), 1887 LAST(0)); 1888 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1889 SRC1_ABS(0), 1890 UPDATE_EXECUTE_MASK(0), 1891 UPDATE_PRED(0), 1892 WRITE_MASK(0), 1893 OMOD(SQ_ALU_OMOD_OFF), 1894 ALU_INST(SQ_OP2_INST_DOT4), 1895 BANK_SWIZZLE(SQ_ALU_VEC_012), 1896 DST_GPR(4), 1897 DST_REL(ABSOLUTE), 1898 DST_ELEM(ELEM_X), 1899 CLAMP(0)); 1900 1901 /* 27 maskY.y DOT4 - mask */ 1902 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1903 SRC0_REL(ABSOLUTE), 1904 SRC0_ELEM(ELEM_Y), 1905 SRC0_NEG(0), 1906 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1907 SRC1_REL(ABSOLUTE), 1908 SRC1_ELEM(ELEM_Y), 1909 SRC1_NEG(0), 1910 INDEX_MODE(SQ_INDEX_LOOP), 1911 PRED_SEL(SQ_PRED_SEL_OFF), 1912 LAST(0)); 1913 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1914 SRC1_ABS(0), 1915 UPDATE_EXECUTE_MASK(0), 1916 UPDATE_PRED(0), 1917 WRITE_MASK(1), 1918 OMOD(SQ_ALU_OMOD_OFF), 1919 ALU_INST(SQ_OP2_INST_DOT4), 1920 BANK_SWIZZLE(SQ_ALU_VEC_012), 1921 DST_GPR(4), 1922 DST_REL(ABSOLUTE), 1923 DST_ELEM(ELEM_Y), 1924 CLAMP(0)); 1925 1926 /* 28 maskY.z DOT4 - mask */ 1927 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1928 SRC0_REL(ABSOLUTE), 1929 SRC0_ELEM(ELEM_Z), 1930 SRC0_NEG(0), 1931 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1932 SRC1_REL(ABSOLUTE), 1933 SRC1_ELEM(ELEM_Z), 1934 SRC1_NEG(0), 1935 INDEX_MODE(SQ_INDEX_LOOP), 1936 PRED_SEL(SQ_PRED_SEL_OFF), 1937 LAST(0)); 1938 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1939 SRC1_ABS(0), 1940 UPDATE_EXECUTE_MASK(0), 1941 UPDATE_PRED(0), 1942 WRITE_MASK(0), 1943 OMOD(SQ_ALU_OMOD_OFF), 1944 ALU_INST(SQ_OP2_INST_DOT4), 1945 BANK_SWIZZLE(SQ_ALU_VEC_012), 1946 DST_GPR(4), 1947 DST_REL(ABSOLUTE), 1948 DST_ELEM(ELEM_Z), 1949 CLAMP(0)); 1950 1951 /* 29 maskY.w DOT4 - mask */ 1952 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1953 SRC0_REL(ABSOLUTE), 1954 SRC0_ELEM(ELEM_W), 1955 SRC0_NEG(0), 1956 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1957 SRC1_REL(ABSOLUTE), 1958 SRC1_ELEM(ELEM_W), 1959 SRC1_NEG(0), 1960 INDEX_MODE(SQ_INDEX_LOOP), 1961 PRED_SEL(SQ_PRED_SEL_OFF), 1962 LAST(1)); 1963 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1964 SRC1_ABS(0), 1965 UPDATE_EXECUTE_MASK(0), 1966 UPDATE_PRED(0), 1967 WRITE_MASK(0), 1968 OMOD(SQ_ALU_OMOD_OFF), 1969 ALU_INST(SQ_OP2_INST_DOT4), 1970 BANK_SWIZZLE(SQ_ALU_VEC_012), 1971 DST_GPR(4), 1972 DST_REL(ABSOLUTE), 1973 DST_ELEM(ELEM_W), 1974 CLAMP(0)); 1975 1976 /* 30 srcX / w */ 1977 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1978 SRC0_REL(ABSOLUTE), 1979 SRC0_ELEM(ELEM_X), 1980 SRC0_NEG(0), 1981 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1982 SRC1_REL(ABSOLUTE), 1983 SRC1_ELEM(ELEM_W), 1984 SRC1_NEG(0), 1985 INDEX_MODE(SQ_INDEX_AR_X), 1986 PRED_SEL(SQ_PRED_SEL_OFF), 1987 LAST(1)); 1988 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1989 SRC1_ABS(0), 1990 UPDATE_EXECUTE_MASK(0), 1991 UPDATE_PRED(0), 1992 WRITE_MASK(1), 1993 OMOD(SQ_ALU_OMOD_OFF), 1994 ALU_INST(SQ_OP2_INST_MUL), 1995 BANK_SWIZZLE(SQ_ALU_VEC_012), 1996 DST_GPR(1), 1997 DST_REL(ABSOLUTE), 1998 DST_ELEM(ELEM_X), 1999 CLAMP(0)); 2000 2001 /* 31 srcY / h */ 2002 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 2003 SRC0_REL(ABSOLUTE), 2004 SRC0_ELEM(ELEM_Y), 2005 SRC0_NEG(0), 2006 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2007 SRC1_REL(ABSOLUTE), 2008 SRC1_ELEM(ELEM_W), 2009 SRC1_NEG(0), 2010 INDEX_MODE(SQ_INDEX_AR_X), 2011 PRED_SEL(SQ_PRED_SEL_OFF), 2012 LAST(1)); 2013 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2014 SRC1_ABS(0), 2015 UPDATE_EXECUTE_MASK(0), 2016 UPDATE_PRED(0), 2017 WRITE_MASK(1), 2018 OMOD(SQ_ALU_OMOD_OFF), 2019 ALU_INST(SQ_OP2_INST_MUL), 2020 BANK_SWIZZLE(SQ_ALU_VEC_012), 2021 DST_GPR(1), 2022 DST_REL(ABSOLUTE), 2023 DST_ELEM(ELEM_Y), 2024 CLAMP(0)); 2025 2026 /* 32 maskX / w */ 2027 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2028 SRC0_REL(ABSOLUTE), 2029 SRC0_ELEM(ELEM_X), 2030 SRC0_NEG(0), 2031 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 2032 SRC1_REL(ABSOLUTE), 2033 SRC1_ELEM(ELEM_W), 2034 SRC1_NEG(0), 2035 INDEX_MODE(SQ_INDEX_AR_X), 2036 PRED_SEL(SQ_PRED_SEL_OFF), 2037 LAST(1)); 2038 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2039 SRC1_ABS(0), 2040 UPDATE_EXECUTE_MASK(0), 2041 UPDATE_PRED(0), 2042 WRITE_MASK(1), 2043 OMOD(SQ_ALU_OMOD_OFF), 2044 ALU_INST(SQ_OP2_INST_MUL), 2045 BANK_SWIZZLE(SQ_ALU_VEC_012), 2046 DST_GPR(0), 2047 DST_REL(ABSOLUTE), 2048 DST_ELEM(ELEM_X), 2049 CLAMP(0)); 2050 2051 /* 33 maskY / h */ 2052 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2053 SRC0_REL(ABSOLUTE), 2054 SRC0_ELEM(ELEM_Y), 2055 SRC0_NEG(0), 2056 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 2057 SRC1_REL(ABSOLUTE), 2058 SRC1_ELEM(ELEM_W), 2059 SRC1_NEG(0), 2060 INDEX_MODE(SQ_INDEX_AR_X), 2061 PRED_SEL(SQ_PRED_SEL_OFF), 2062 LAST(1)); 2063 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2064 SRC1_ABS(0), 2065 UPDATE_EXECUTE_MASK(0), 2066 UPDATE_PRED(0), 2067 WRITE_MASK(1), 2068 OMOD(SQ_ALU_OMOD_OFF), 2069 ALU_INST(SQ_OP2_INST_MUL), 2070 BANK_SWIZZLE(SQ_ALU_VEC_012), 2071 DST_GPR(0), 2072 DST_REL(ABSOLUTE), 2073 DST_ELEM(ELEM_Y), 2074 CLAMP(0)); 2075 2076 /* 34 srcX.x DOT4 - non-mask */ 2077 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2078 SRC0_REL(ABSOLUTE), 2079 SRC0_ELEM(ELEM_X), 2080 SRC0_NEG(0), 2081 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2082 SRC1_REL(ABSOLUTE), 2083 SRC1_ELEM(ELEM_X), 2084 SRC1_NEG(0), 2085 INDEX_MODE(SQ_INDEX_LOOP), 2086 PRED_SEL(SQ_PRED_SEL_OFF), 2087 LAST(0)); 2088 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2089 SRC1_ABS(0), 2090 UPDATE_EXECUTE_MASK(0), 2091 UPDATE_PRED(0), 2092 WRITE_MASK(1), 2093 OMOD(SQ_ALU_OMOD_OFF), 2094 ALU_INST(SQ_OP2_INST_DOT4), 2095 BANK_SWIZZLE(SQ_ALU_VEC_012), 2096 DST_GPR(2), 2097 DST_REL(ABSOLUTE), 2098 DST_ELEM(ELEM_X), 2099 CLAMP(0)); 2100 2101 /* 35 srcX.y DOT4 - non-mask */ 2102 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2103 SRC0_REL(ABSOLUTE), 2104 SRC0_ELEM(ELEM_Y), 2105 SRC0_NEG(0), 2106 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2107 SRC1_REL(ABSOLUTE), 2108 SRC1_ELEM(ELEM_Y), 2109 SRC1_NEG(0), 2110 INDEX_MODE(SQ_INDEX_LOOP), 2111 PRED_SEL(SQ_PRED_SEL_OFF), 2112 LAST(0)); 2113 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2114 SRC1_ABS(0), 2115 UPDATE_EXECUTE_MASK(0), 2116 UPDATE_PRED(0), 2117 WRITE_MASK(0), 2118 OMOD(SQ_ALU_OMOD_OFF), 2119 ALU_INST(SQ_OP2_INST_DOT4), 2120 BANK_SWIZZLE(SQ_ALU_VEC_012), 2121 DST_GPR(2), 2122 DST_REL(ABSOLUTE), 2123 DST_ELEM(ELEM_Y), 2124 CLAMP(0)); 2125 2126 /* 36 srcX.z DOT4 - non-mask */ 2127 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2128 SRC0_REL(ABSOLUTE), 2129 SRC0_ELEM(ELEM_Z), 2130 SRC0_NEG(0), 2131 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2132 SRC1_REL(ABSOLUTE), 2133 SRC1_ELEM(ELEM_Z), 2134 SRC1_NEG(0), 2135 INDEX_MODE(SQ_INDEX_LOOP), 2136 PRED_SEL(SQ_PRED_SEL_OFF), 2137 LAST(0)); 2138 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2139 SRC1_ABS(0), 2140 UPDATE_EXECUTE_MASK(0), 2141 UPDATE_PRED(0), 2142 WRITE_MASK(0), 2143 OMOD(SQ_ALU_OMOD_OFF), 2144 ALU_INST(SQ_OP2_INST_DOT4), 2145 BANK_SWIZZLE(SQ_ALU_VEC_012), 2146 DST_GPR(2), 2147 DST_REL(ABSOLUTE), 2148 DST_ELEM(ELEM_Z), 2149 CLAMP(0)); 2150 2151 /* 37 srcX.w DOT4 - non-mask */ 2152 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2153 SRC0_REL(ABSOLUTE), 2154 SRC0_ELEM(ELEM_W), 2155 SRC0_NEG(0), 2156 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2157 SRC1_REL(ABSOLUTE), 2158 SRC1_ELEM(ELEM_W), 2159 SRC1_NEG(0), 2160 INDEX_MODE(SQ_INDEX_LOOP), 2161 PRED_SEL(SQ_PRED_SEL_OFF), 2162 LAST(1)); 2163 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2164 SRC1_ABS(0), 2165 UPDATE_EXECUTE_MASK(0), 2166 UPDATE_PRED(0), 2167 WRITE_MASK(0), 2168 OMOD(SQ_ALU_OMOD_OFF), 2169 ALU_INST(SQ_OP2_INST_DOT4), 2170 BANK_SWIZZLE(SQ_ALU_VEC_012), 2171 DST_GPR(2), 2172 DST_REL(ABSOLUTE), 2173 DST_ELEM(ELEM_W), 2174 CLAMP(0)); 2175 2176 /* 38 srcY.x DOT4 - non-mask */ 2177 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2178 SRC0_REL(ABSOLUTE), 2179 SRC0_ELEM(ELEM_X), 2180 SRC0_NEG(0), 2181 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2182 SRC1_REL(ABSOLUTE), 2183 SRC1_ELEM(ELEM_X), 2184 SRC1_NEG(0), 2185 INDEX_MODE(SQ_INDEX_LOOP), 2186 PRED_SEL(SQ_PRED_SEL_OFF), 2187 LAST(0)); 2188 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2189 SRC1_ABS(0), 2190 UPDATE_EXECUTE_MASK(0), 2191 UPDATE_PRED(0), 2192 WRITE_MASK(0), 2193 OMOD(SQ_ALU_OMOD_OFF), 2194 ALU_INST(SQ_OP2_INST_DOT4), 2195 BANK_SWIZZLE(SQ_ALU_VEC_012), 2196 DST_GPR(2), 2197 DST_REL(ABSOLUTE), 2198 DST_ELEM(ELEM_X), 2199 CLAMP(0)); 2200 2201 /* 39 srcY.y DOT4 - non-mask */ 2202 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2203 SRC0_REL(ABSOLUTE), 2204 SRC0_ELEM(ELEM_Y), 2205 SRC0_NEG(0), 2206 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2207 SRC1_REL(ABSOLUTE), 2208 SRC1_ELEM(ELEM_Y), 2209 SRC1_NEG(0), 2210 INDEX_MODE(SQ_INDEX_LOOP), 2211 PRED_SEL(SQ_PRED_SEL_OFF), 2212 LAST(0)); 2213 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2214 SRC1_ABS(0), 2215 UPDATE_EXECUTE_MASK(0), 2216 UPDATE_PRED(0), 2217 WRITE_MASK(1), 2218 OMOD(SQ_ALU_OMOD_OFF), 2219 ALU_INST(SQ_OP2_INST_DOT4), 2220 BANK_SWIZZLE(SQ_ALU_VEC_012), 2221 DST_GPR(2), 2222 DST_REL(ABSOLUTE), 2223 DST_ELEM(ELEM_Y), 2224 CLAMP(0)); 2225 2226 /* 40 srcY.z DOT4 - non-mask */ 2227 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2228 SRC0_REL(ABSOLUTE), 2229 SRC0_ELEM(ELEM_Z), 2230 SRC0_NEG(0), 2231 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2232 SRC1_REL(ABSOLUTE), 2233 SRC1_ELEM(ELEM_Z), 2234 SRC1_NEG(0), 2235 INDEX_MODE(SQ_INDEX_LOOP), 2236 PRED_SEL(SQ_PRED_SEL_OFF), 2237 LAST(0)); 2238 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2239 SRC1_ABS(0), 2240 UPDATE_EXECUTE_MASK(0), 2241 UPDATE_PRED(0), 2242 WRITE_MASK(0), 2243 OMOD(SQ_ALU_OMOD_OFF), 2244 ALU_INST(SQ_OP2_INST_DOT4), 2245 BANK_SWIZZLE(SQ_ALU_VEC_012), 2246 DST_GPR(2), 2247 DST_REL(ABSOLUTE), 2248 DST_ELEM(ELEM_Z), 2249 CLAMP(0)); 2250 2251 /* 41 srcY.w DOT4 - non-mask */ 2252 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2253 SRC0_REL(ABSOLUTE), 2254 SRC0_ELEM(ELEM_W), 2255 SRC0_NEG(0), 2256 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2257 SRC1_REL(ABSOLUTE), 2258 SRC1_ELEM(ELEM_W), 2259 SRC1_NEG(0), 2260 INDEX_MODE(SQ_INDEX_LOOP), 2261 PRED_SEL(SQ_PRED_SEL_OFF), 2262 LAST(1)); 2263 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2264 SRC1_ABS(0), 2265 UPDATE_EXECUTE_MASK(0), 2266 UPDATE_PRED(0), 2267 WRITE_MASK(0), 2268 OMOD(SQ_ALU_OMOD_OFF), 2269 ALU_INST(SQ_OP2_INST_DOT4), 2270 BANK_SWIZZLE(SQ_ALU_VEC_012), 2271 DST_GPR(2), 2272 DST_REL(ABSOLUTE), 2273 DST_ELEM(ELEM_W), 2274 CLAMP(0)); 2275 2276 /* 42 srcX / w */ 2277 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2278 SRC0_REL(ABSOLUTE), 2279 SRC0_ELEM(ELEM_X), 2280 SRC0_NEG(0), 2281 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2282 SRC1_REL(ABSOLUTE), 2283 SRC1_ELEM(ELEM_W), 2284 SRC1_NEG(0), 2285 INDEX_MODE(SQ_INDEX_AR_X), 2286 PRED_SEL(SQ_PRED_SEL_OFF), 2287 LAST(1)); 2288 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2289 SRC1_ABS(0), 2290 UPDATE_EXECUTE_MASK(0), 2291 UPDATE_PRED(0), 2292 WRITE_MASK(1), 2293 OMOD(SQ_ALU_OMOD_OFF), 2294 ALU_INST(SQ_OP2_INST_MUL), 2295 BANK_SWIZZLE(SQ_ALU_VEC_012), 2296 DST_GPR(0), 2297 DST_REL(ABSOLUTE), 2298 DST_ELEM(ELEM_X), 2299 CLAMP(0)); 2300 2301 /* 43 srcY / h */ 2302 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2303 SRC0_REL(ABSOLUTE), 2304 SRC0_ELEM(ELEM_Y), 2305 SRC0_NEG(0), 2306 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2307 SRC1_REL(ABSOLUTE), 2308 SRC1_ELEM(ELEM_W), 2309 SRC1_NEG(0), 2310 INDEX_MODE(SQ_INDEX_AR_X), 2311 PRED_SEL(SQ_PRED_SEL_OFF), 2312 LAST(1)); 2313 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2314 SRC1_ABS(0), 2315 UPDATE_EXECUTE_MASK(0), 2316 UPDATE_PRED(0), 2317 WRITE_MASK(1), 2318 OMOD(SQ_ALU_OMOD_OFF), 2319 ALU_INST(SQ_OP2_INST_MUL), 2320 BANK_SWIZZLE(SQ_ALU_VEC_012), 2321 DST_GPR(0), 2322 DST_REL(ABSOLUTE), 2323 DST_ELEM(ELEM_Y), 2324 CLAMP(0)); 2325 2326 /* mask vfetch - 44/45 - dst */ 2327 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2328 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2329 FETCH_WHOLE_QUAD(0), 2330 BUFFER_ID(0), 2331 SRC_GPR(0), 2332 SRC_REL(ABSOLUTE), 2333 SRC_SEL_X(SQ_SEL_X), 2334 MEGA_FETCH_COUNT(24)); 2335 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2336 DST_REL(0), 2337 DST_SEL_X(SQ_SEL_X), 2338 DST_SEL_Y(SQ_SEL_Y), 2339 DST_SEL_Z(SQ_SEL_0), 2340 DST_SEL_W(SQ_SEL_1), 2341 USE_CONST_FIELDS(0), 2342 DATA_FORMAT(FMT_32_32_FLOAT), 2343 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2344 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2345 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2346 shader[i++] = VTX_DWORD2(OFFSET(0), 2347 ENDIAN_SWAP(ENDIAN_NONE), 2348 CONST_BUF_NO_STRIDE(0), 2349 MEGA_FETCH(1), 2350 ALT_CONST(0), 2351 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2352 shader[i++] = VTX_DWORD_PAD; 2353 /* 46/47 - src */ 2354 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2355 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2356 FETCH_WHOLE_QUAD(0), 2357 BUFFER_ID(0), 2358 SRC_GPR(0), 2359 SRC_REL(ABSOLUTE), 2360 SRC_SEL_X(SQ_SEL_X), 2361 MEGA_FETCH_COUNT(8)); 2362 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2363 DST_REL(0), 2364 DST_SEL_X(SQ_SEL_X), 2365 DST_SEL_Y(SQ_SEL_Y), 2366 DST_SEL_Z(SQ_SEL_1), 2367 DST_SEL_W(SQ_SEL_0), 2368 USE_CONST_FIELDS(0), 2369 DATA_FORMAT(FMT_32_32_FLOAT), 2370 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2371 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2372 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2373 shader[i++] = VTX_DWORD2(OFFSET(8), 2374 ENDIAN_SWAP(ENDIAN_NONE), 2375 CONST_BUF_NO_STRIDE(0), 2376 MEGA_FETCH(0), 2377 ALT_CONST(0), 2378 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2379 shader[i++] = VTX_DWORD_PAD; 2380 /* 48/49 - mask */ 2381 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2382 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2383 FETCH_WHOLE_QUAD(0), 2384 BUFFER_ID(0), 2385 SRC_GPR(0), 2386 SRC_REL(ABSOLUTE), 2387 SRC_SEL_X(SQ_SEL_X), 2388 MEGA_FETCH_COUNT(8)); 2389 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2390 DST_REL(0), 2391 DST_SEL_X(SQ_SEL_X), 2392 DST_SEL_Y(SQ_SEL_Y), 2393 DST_SEL_Z(SQ_SEL_1), 2394 DST_SEL_W(SQ_SEL_0), 2395 USE_CONST_FIELDS(0), 2396 DATA_FORMAT(FMT_32_32_FLOAT), 2397 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2398 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2399 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2400 shader[i++] = VTX_DWORD2(OFFSET(16), 2401 ENDIAN_SWAP(ENDIAN_NONE), 2402 CONST_BUF_NO_STRIDE(0), 2403 MEGA_FETCH(0), 2404 ALT_CONST(0), 2405 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2406 shader[i++] = VTX_DWORD_PAD; 2407 2408 /* no mask vfetch - 50/51 - dst */ 2409 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2410 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2411 FETCH_WHOLE_QUAD(0), 2412 BUFFER_ID(0), 2413 SRC_GPR(0), 2414 SRC_REL(ABSOLUTE), 2415 SRC_SEL_X(SQ_SEL_X), 2416 MEGA_FETCH_COUNT(16)); 2417 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2418 DST_REL(0), 2419 DST_SEL_X(SQ_SEL_X), 2420 DST_SEL_Y(SQ_SEL_Y), 2421 DST_SEL_Z(SQ_SEL_0), 2422 DST_SEL_W(SQ_SEL_1), 2423 USE_CONST_FIELDS(0), 2424 DATA_FORMAT(FMT_32_32_FLOAT), 2425 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2426 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2427 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2428 shader[i++] = VTX_DWORD2(OFFSET(0), 2429 ENDIAN_SWAP(ENDIAN_NONE), 2430 CONST_BUF_NO_STRIDE(0), 2431 MEGA_FETCH(1), 2432 ALT_CONST(0), 2433 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2434 shader[i++] = VTX_DWORD_PAD; 2435 /* 52/53 - src */ 2436 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2437 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2438 FETCH_WHOLE_QUAD(0), 2439 BUFFER_ID(0), 2440 SRC_GPR(0), 2441 SRC_REL(ABSOLUTE), 2442 SRC_SEL_X(SQ_SEL_X), 2443 MEGA_FETCH_COUNT(8)); 2444 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2445 DST_REL(0), 2446 DST_SEL_X(SQ_SEL_X), 2447 DST_SEL_Y(SQ_SEL_Y), 2448 DST_SEL_Z(SQ_SEL_1), 2449 DST_SEL_W(SQ_SEL_0), 2450 USE_CONST_FIELDS(0), 2451 DATA_FORMAT(FMT_32_32_FLOAT), 2452 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2453 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2454 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2455 shader[i++] = VTX_DWORD2(OFFSET(8), 2456 ENDIAN_SWAP(ENDIAN_NONE), 2457 CONST_BUF_NO_STRIDE(0), 2458 MEGA_FETCH(0), 2459 ALT_CONST(0), 2460 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2461 shader[i++] = VTX_DWORD_PAD; 2462 2463 return i; 2464} 2465 2466/* comp ps --------------------------------------- */ 2467int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2468{ 2469 int i = 0; 2470 2471 /* 0 */ 2472 shader[i++] = CF_DWORD0(ADDR(3), 2473 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2474 shader[i++] = CF_DWORD1(POP_COUNT(0), 2475 CF_CONST(0), 2476 COND(SQ_CF_COND_BOOL), 2477 I_COUNT(0), 2478 VALID_PIXEL_MODE(0), 2479 END_OF_PROGRAM(0), 2480 CF_INST(SQ_CF_INST_CALL), 2481 WHOLE_QUAD_MODE(0), 2482 BARRIER(0)); 2483 /* 1 */ 2484 shader[i++] = CF_DWORD0(ADDR(8), 2485 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2486 shader[i++] = CF_DWORD1(POP_COUNT(0), 2487 CF_CONST(0), 2488 COND(SQ_CF_COND_NOT_BOOL), 2489 I_COUNT(0), 2490 VALID_PIXEL_MODE(0), 2491 END_OF_PROGRAM(0), 2492 CF_INST(SQ_CF_INST_CALL), 2493 WHOLE_QUAD_MODE(0), 2494 BARRIER(0)); 2495 /* 2 */ 2496 shader[i++] = CF_DWORD0(ADDR(0), 2497 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2498 shader[i++] = CF_DWORD1(POP_COUNT(0), 2499 CF_CONST(0), 2500 COND(SQ_CF_COND_ACTIVE), 2501 I_COUNT(0), 2502 VALID_PIXEL_MODE(0), 2503 END_OF_PROGRAM(1), 2504 CF_INST(SQ_CF_INST_NOP), 2505 WHOLE_QUAD_MODE(0), 2506 BARRIER(1)); 2507 2508 /* 3 - mask sub */ 2509 shader[i++] = CF_ALU_DWORD0(ADDR(12), 2510 KCACHE_BANK0(0), 2511 KCACHE_BANK1(0), 2512 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2513 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2514 KCACHE_ADDR0(0), 2515 KCACHE_ADDR1(0), 2516 I_COUNT(8), 2517 ALT_CONST(0), 2518 CF_INST(SQ_CF_INST_ALU), 2519 WHOLE_QUAD_MODE(0), 2520 BARRIER(1)); 2521 2522 /* 4 */ 2523 shader[i++] = CF_DWORD0(ADDR(28), 2524 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2525 shader[i++] = CF_DWORD1(POP_COUNT(0), 2526 CF_CONST(0), 2527 COND(SQ_CF_COND_ACTIVE), 2528 I_COUNT(2), 2529 VALID_PIXEL_MODE(0), 2530 END_OF_PROGRAM(0), 2531 CF_INST(SQ_CF_INST_TC), 2532 WHOLE_QUAD_MODE(0), 2533 BARRIER(1)); 2534 2535 /* 5 */ 2536 shader[i++] = CF_ALU_DWORD0(ADDR(20), 2537 KCACHE_BANK0(0), 2538 KCACHE_BANK1(0), 2539 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2540 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2541 KCACHE_ADDR0(0), 2542 KCACHE_ADDR1(0), 2543 I_COUNT(4), 2544 ALT_CONST(0), 2545 CF_INST(SQ_CF_INST_ALU), 2546 WHOLE_QUAD_MODE(0), 2547 BARRIER(1)); 2548 2549 /* 6 */ 2550 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2551 TYPE(SQ_EXPORT_PIXEL), 2552 RW_GPR(2), 2553 RW_REL(ABSOLUTE), 2554 INDEX_GPR(0), 2555 ELEM_SIZE(1)); 2556 2557 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2558 SRC_SEL_Y(SQ_SEL_Y), 2559 SRC_SEL_Z(SQ_SEL_Z), 2560 SRC_SEL_W(SQ_SEL_W), 2561 BURST_COUNT(1), 2562 VALID_PIXEL_MODE(0), 2563 END_OF_PROGRAM(0), 2564 CF_INST(SQ_CF_INST_EXPORT_DONE), 2565 MARK(0), 2566 BARRIER(1)); 2567 /* 7 */ 2568 shader[i++] = CF_DWORD0(ADDR(0), 2569 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2570 shader[i++] = CF_DWORD1(POP_COUNT(0), 2571 CF_CONST(0), 2572 COND(SQ_CF_COND_ACTIVE), 2573 I_COUNT(0), 2574 VALID_PIXEL_MODE(0), 2575 END_OF_PROGRAM(0), 2576 CF_INST(SQ_CF_INST_RETURN), 2577 WHOLE_QUAD_MODE(0), 2578 BARRIER(1)); 2579 2580 /* 8 - non-mask sub */ 2581 shader[i++] = CF_ALU_DWORD0(ADDR(24), 2582 KCACHE_BANK0(0), 2583 KCACHE_BANK1(0), 2584 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2585 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2586 KCACHE_ADDR0(0), 2587 KCACHE_ADDR1(0), 2588 I_COUNT(4), 2589 ALT_CONST(0), 2590 CF_INST(SQ_CF_INST_ALU), 2591 WHOLE_QUAD_MODE(0), 2592 BARRIER(1)); 2593 /* 9 */ 2594 shader[i++] = CF_DWORD0(ADDR(32), 2595 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2596 shader[i++] = CF_DWORD1(POP_COUNT(0), 2597 CF_CONST(0), 2598 COND(SQ_CF_COND_ACTIVE), 2599 I_COUNT(1), 2600 VALID_PIXEL_MODE(0), 2601 END_OF_PROGRAM(0), 2602 CF_INST(SQ_CF_INST_TC), 2603 WHOLE_QUAD_MODE(0), 2604 BARRIER(1)); 2605 2606 /* 10 */ 2607 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2608 TYPE(SQ_EXPORT_PIXEL), 2609 RW_GPR(0), 2610 RW_REL(ABSOLUTE), 2611 INDEX_GPR(0), 2612 ELEM_SIZE(1)); 2613 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2614 SRC_SEL_Y(SQ_SEL_Y), 2615 SRC_SEL_Z(SQ_SEL_Z), 2616 SRC_SEL_W(SQ_SEL_W), 2617 BURST_COUNT(1), 2618 VALID_PIXEL_MODE(0), 2619 END_OF_PROGRAM(0), 2620 CF_INST(SQ_CF_INST_EXPORT_DONE), 2621 MARK(0), 2622 BARRIER(1)); 2623 2624 /* 11 */ 2625 shader[i++] = CF_DWORD0(ADDR(0), 2626 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2627 shader[i++] = CF_DWORD1(POP_COUNT(0), 2628 CF_CONST(0), 2629 COND(SQ_CF_COND_ACTIVE), 2630 I_COUNT(0), 2631 VALID_PIXEL_MODE(0), 2632 END_OF_PROGRAM(0), 2633 CF_INST(SQ_CF_INST_RETURN), 2634 WHOLE_QUAD_MODE(0), 2635 BARRIER(1)); 2636 2637 /* 12 interpolate src tex coords - mask */ 2638 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2639 SRC0_REL(ABSOLUTE), 2640 SRC0_ELEM(ELEM_Y), 2641 SRC0_NEG(0), 2642 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2643 SRC1_REL(ABSOLUTE), 2644 SRC1_ELEM(ELEM_X), 2645 SRC1_NEG(0), 2646 INDEX_MODE(SQ_INDEX_AR_X), 2647 PRED_SEL(SQ_PRED_SEL_OFF), 2648 LAST(0)); 2649 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2650 SRC1_ABS(0), 2651 UPDATE_EXECUTE_MASK(0), 2652 UPDATE_PRED(0), 2653 WRITE_MASK(1), 2654 OMOD(SQ_ALU_OMOD_OFF), 2655 ALU_INST(SQ_OP2_INST_INTERP_XY), 2656 BANK_SWIZZLE(SQ_ALU_VEC_210), 2657 DST_GPR(1), 2658 DST_REL(ABSOLUTE), 2659 DST_ELEM(ELEM_X), 2660 CLAMP(0)); 2661 /* 13 */ 2662 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2663 SRC0_REL(ABSOLUTE), 2664 SRC0_ELEM(ELEM_X), 2665 SRC0_NEG(0), 2666 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2667 SRC1_REL(ABSOLUTE), 2668 SRC1_ELEM(ELEM_X), 2669 SRC1_NEG(0), 2670 INDEX_MODE(SQ_INDEX_AR_X), 2671 PRED_SEL(SQ_PRED_SEL_OFF), 2672 LAST(0)); 2673 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2674 SRC1_ABS(0), 2675 UPDATE_EXECUTE_MASK(0), 2676 UPDATE_PRED(0), 2677 WRITE_MASK(1), 2678 OMOD(SQ_ALU_OMOD_OFF), 2679 ALU_INST(SQ_OP2_INST_INTERP_XY), 2680 BANK_SWIZZLE(SQ_ALU_VEC_210), 2681 DST_GPR(1), 2682 DST_REL(ABSOLUTE), 2683 DST_ELEM(ELEM_Y), 2684 CLAMP(0)); 2685 /* 14 */ 2686 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2687 SRC0_REL(ABSOLUTE), 2688 SRC0_ELEM(ELEM_Y), 2689 SRC0_NEG(0), 2690 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2691 SRC1_REL(ABSOLUTE), 2692 SRC1_ELEM(ELEM_X), 2693 SRC1_NEG(0), 2694 INDEX_MODE(SQ_INDEX_AR_X), 2695 PRED_SEL(SQ_PRED_SEL_OFF), 2696 LAST(0)); 2697 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2698 SRC1_ABS(0), 2699 UPDATE_EXECUTE_MASK(0), 2700 UPDATE_PRED(0), 2701 WRITE_MASK(0), 2702 OMOD(SQ_ALU_OMOD_OFF), 2703 ALU_INST(SQ_OP2_INST_INTERP_XY), 2704 BANK_SWIZZLE(SQ_ALU_VEC_210), 2705 DST_GPR(1), 2706 DST_REL(ABSOLUTE), 2707 DST_ELEM(ELEM_Z), 2708 CLAMP(0)); 2709 /* 15 */ 2710 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2711 SRC0_REL(ABSOLUTE), 2712 SRC0_ELEM(ELEM_X), 2713 SRC0_NEG(0), 2714 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2715 SRC1_REL(ABSOLUTE), 2716 SRC1_ELEM(ELEM_X), 2717 SRC1_NEG(0), 2718 INDEX_MODE(SQ_INDEX_AR_X), 2719 PRED_SEL(SQ_PRED_SEL_OFF), 2720 LAST(1)); 2721 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2722 SRC1_ABS(0), 2723 UPDATE_EXECUTE_MASK(0), 2724 UPDATE_PRED(0), 2725 WRITE_MASK(0), 2726 OMOD(SQ_ALU_OMOD_OFF), 2727 ALU_INST(SQ_OP2_INST_INTERP_XY), 2728 BANK_SWIZZLE(SQ_ALU_VEC_210), 2729 DST_GPR(1), 2730 DST_REL(ABSOLUTE), 2731 DST_ELEM(ELEM_W), 2732 CLAMP(0)); 2733 2734 /* 16 interpolate mask tex coords */ 2735 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2736 SRC0_REL(ABSOLUTE), 2737 SRC0_ELEM(ELEM_Y), 2738 SRC0_NEG(0), 2739 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2740 SRC1_REL(ABSOLUTE), 2741 SRC1_ELEM(ELEM_X), 2742 SRC1_NEG(0), 2743 INDEX_MODE(SQ_INDEX_AR_X), 2744 PRED_SEL(SQ_PRED_SEL_OFF), 2745 LAST(0)); 2746 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2747 SRC1_ABS(0), 2748 UPDATE_EXECUTE_MASK(0), 2749 UPDATE_PRED(0), 2750 WRITE_MASK(1), 2751 OMOD(SQ_ALU_OMOD_OFF), 2752 ALU_INST(SQ_OP2_INST_INTERP_XY), 2753 BANK_SWIZZLE(SQ_ALU_VEC_210), 2754 DST_GPR(0), 2755 DST_REL(ABSOLUTE), 2756 DST_ELEM(ELEM_X), 2757 CLAMP(0)); 2758 /* 17 */ 2759 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2760 SRC0_REL(ABSOLUTE), 2761 SRC0_ELEM(ELEM_X), 2762 SRC0_NEG(0), 2763 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2764 SRC1_REL(ABSOLUTE), 2765 SRC1_ELEM(ELEM_X), 2766 SRC1_NEG(0), 2767 INDEX_MODE(SQ_INDEX_AR_X), 2768 PRED_SEL(SQ_PRED_SEL_OFF), 2769 LAST(0)); 2770 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2771 SRC1_ABS(0), 2772 UPDATE_EXECUTE_MASK(0), 2773 UPDATE_PRED(0), 2774 WRITE_MASK(1), 2775 OMOD(SQ_ALU_OMOD_OFF), 2776 ALU_INST(SQ_OP2_INST_INTERP_XY), 2777 BANK_SWIZZLE(SQ_ALU_VEC_210), 2778 DST_GPR(0), 2779 DST_REL(ABSOLUTE), 2780 DST_ELEM(ELEM_Y), 2781 CLAMP(0)); 2782 /* 18 */ 2783 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2784 SRC0_REL(ABSOLUTE), 2785 SRC0_ELEM(ELEM_Y), 2786 SRC0_NEG(0), 2787 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2788 SRC1_REL(ABSOLUTE), 2789 SRC1_ELEM(ELEM_X), 2790 SRC1_NEG(0), 2791 INDEX_MODE(SQ_INDEX_AR_X), 2792 PRED_SEL(SQ_PRED_SEL_OFF), 2793 LAST(0)); 2794 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2795 SRC1_ABS(0), 2796 UPDATE_EXECUTE_MASK(0), 2797 UPDATE_PRED(0), 2798 WRITE_MASK(0), 2799 OMOD(SQ_ALU_OMOD_OFF), 2800 ALU_INST(SQ_OP2_INST_INTERP_XY), 2801 BANK_SWIZZLE(SQ_ALU_VEC_210), 2802 DST_GPR(0), 2803 DST_REL(ABSOLUTE), 2804 DST_ELEM(ELEM_Z), 2805 CLAMP(0)); 2806 /* 19 */ 2807 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2808 SRC0_REL(ABSOLUTE), 2809 SRC0_ELEM(ELEM_X), 2810 SRC0_NEG(0), 2811 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2812 SRC1_REL(ABSOLUTE), 2813 SRC1_ELEM(ELEM_X), 2814 SRC1_NEG(0), 2815 INDEX_MODE(SQ_INDEX_AR_X), 2816 PRED_SEL(SQ_PRED_SEL_OFF), 2817 LAST(1)); 2818 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2819 SRC1_ABS(0), 2820 UPDATE_EXECUTE_MASK(0), 2821 UPDATE_PRED(0), 2822 WRITE_MASK(0), 2823 OMOD(SQ_ALU_OMOD_OFF), 2824 ALU_INST(SQ_OP2_INST_INTERP_XY), 2825 BANK_SWIZZLE(SQ_ALU_VEC_210), 2826 DST_GPR(0), 2827 DST_REL(ABSOLUTE), 2828 DST_ELEM(ELEM_W), 2829 CLAMP(0)); 2830 2831 /* 20 - alu 0 */ 2832 /* MUL gpr[2].x gpr[0].x gpr[1].x */ 2833 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2834 SRC0_REL(ABSOLUTE), 2835 SRC0_ELEM(ELEM_X), 2836 SRC0_NEG(0), 2837 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2838 SRC1_REL(ABSOLUTE), 2839 SRC1_ELEM(ELEM_X), 2840 SRC1_NEG(0), 2841 INDEX_MODE(SQ_INDEX_LOOP), 2842 PRED_SEL(SQ_PRED_SEL_OFF), 2843 LAST(0)); 2844 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2845 SRC1_ABS(0), 2846 UPDATE_EXECUTE_MASK(0), 2847 UPDATE_PRED(0), 2848 WRITE_MASK(1), 2849 OMOD(SQ_ALU_OMOD_OFF), 2850 ALU_INST(SQ_OP2_INST_MUL), 2851 BANK_SWIZZLE(SQ_ALU_VEC_012), 2852 DST_GPR(2), 2853 DST_REL(ABSOLUTE), 2854 DST_ELEM(ELEM_X), 2855 CLAMP(1)); 2856 /* 21 - alu 1 */ 2857 /* MUL gpr[2].y gpr[0].y gpr[1].y */ 2858 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2859 SRC0_REL(ABSOLUTE), 2860 SRC0_ELEM(ELEM_Y), 2861 SRC0_NEG(0), 2862 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2863 SRC1_REL(ABSOLUTE), 2864 SRC1_ELEM(ELEM_Y), 2865 SRC1_NEG(0), 2866 INDEX_MODE(SQ_INDEX_LOOP), 2867 PRED_SEL(SQ_PRED_SEL_OFF), 2868 LAST(0)); 2869 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2870 SRC1_ABS(0), 2871 UPDATE_EXECUTE_MASK(0), 2872 UPDATE_PRED(0), 2873 WRITE_MASK(1), 2874 OMOD(SQ_ALU_OMOD_OFF), 2875 ALU_INST(SQ_OP2_INST_MUL), 2876 BANK_SWIZZLE(SQ_ALU_VEC_012), 2877 DST_GPR(2), 2878 DST_REL(ABSOLUTE), 2879 DST_ELEM(ELEM_Y), 2880 CLAMP(1)); 2881 /* 22 - alu 2 */ 2882 /* MUL gpr[2].z gpr[0].z gpr[1].z */ 2883 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2884 SRC0_REL(ABSOLUTE), 2885 SRC0_ELEM(ELEM_Z), 2886 SRC0_NEG(0), 2887 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2888 SRC1_REL(ABSOLUTE), 2889 SRC1_ELEM(ELEM_Z), 2890 SRC1_NEG(0), 2891 INDEX_MODE(SQ_INDEX_LOOP), 2892 PRED_SEL(SQ_PRED_SEL_OFF), 2893 LAST(0)); 2894 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2895 SRC1_ABS(0), 2896 UPDATE_EXECUTE_MASK(0), 2897 UPDATE_PRED(0), 2898 WRITE_MASK(1), 2899 OMOD(SQ_ALU_OMOD_OFF), 2900 ALU_INST(SQ_OP2_INST_MUL), 2901 BANK_SWIZZLE(SQ_ALU_VEC_012), 2902 DST_GPR(2), 2903 DST_REL(ABSOLUTE), 2904 DST_ELEM(ELEM_Z), 2905 CLAMP(1)); 2906 /* 23 - alu 3 */ 2907 /* MUL gpr[2].w gpr[0].w gpr[1].w */ 2908 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2909 SRC0_REL(ABSOLUTE), 2910 SRC0_ELEM(ELEM_W), 2911 SRC0_NEG(0), 2912 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2913 SRC1_REL(ABSOLUTE), 2914 SRC1_ELEM(ELEM_W), 2915 SRC1_NEG(0), 2916 INDEX_MODE(SQ_INDEX_LOOP), 2917 PRED_SEL(SQ_PRED_SEL_OFF), 2918 LAST(1)); 2919 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2920 SRC1_ABS(0), 2921 UPDATE_EXECUTE_MASK(0), 2922 UPDATE_PRED(0), 2923 WRITE_MASK(1), 2924 OMOD(SQ_ALU_OMOD_OFF), 2925 ALU_INST(SQ_OP2_INST_MUL), 2926 BANK_SWIZZLE(SQ_ALU_VEC_012), 2927 DST_GPR(2), 2928 DST_REL(ABSOLUTE), 2929 DST_ELEM(ELEM_W), 2930 CLAMP(1)); 2931 2932 /* 24 - interpolate tex coords - non-mask */ 2933 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2934 SRC0_REL(ABSOLUTE), 2935 SRC0_ELEM(ELEM_Y), 2936 SRC0_NEG(0), 2937 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2938 SRC1_REL(ABSOLUTE), 2939 SRC1_ELEM(ELEM_X), 2940 SRC1_NEG(0), 2941 INDEX_MODE(SQ_INDEX_AR_X), 2942 PRED_SEL(SQ_PRED_SEL_OFF), 2943 LAST(0)); 2944 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2945 SRC1_ABS(0), 2946 UPDATE_EXECUTE_MASK(0), 2947 UPDATE_PRED(0), 2948 WRITE_MASK(1), 2949 OMOD(SQ_ALU_OMOD_OFF), 2950 ALU_INST(SQ_OP2_INST_INTERP_XY), 2951 BANK_SWIZZLE(SQ_ALU_VEC_210), 2952 DST_GPR(0), 2953 DST_REL(ABSOLUTE), 2954 DST_ELEM(ELEM_X), 2955 CLAMP(0)); 2956 /* 25 */ 2957 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2958 SRC0_REL(ABSOLUTE), 2959 SRC0_ELEM(ELEM_X), 2960 SRC0_NEG(0), 2961 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2962 SRC1_REL(ABSOLUTE), 2963 SRC1_ELEM(ELEM_X), 2964 SRC1_NEG(0), 2965 INDEX_MODE(SQ_INDEX_AR_X), 2966 PRED_SEL(SQ_PRED_SEL_OFF), 2967 LAST(0)); 2968 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2969 SRC1_ABS(0), 2970 UPDATE_EXECUTE_MASK(0), 2971 UPDATE_PRED(0), 2972 WRITE_MASK(1), 2973 OMOD(SQ_ALU_OMOD_OFF), 2974 ALU_INST(SQ_OP2_INST_INTERP_XY), 2975 BANK_SWIZZLE(SQ_ALU_VEC_210), 2976 DST_GPR(0), 2977 DST_REL(ABSOLUTE), 2978 DST_ELEM(ELEM_Y), 2979 CLAMP(0)); 2980 /* 26 */ 2981 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2982 SRC0_REL(ABSOLUTE), 2983 SRC0_ELEM(ELEM_Y), 2984 SRC0_NEG(0), 2985 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2986 SRC1_REL(ABSOLUTE), 2987 SRC1_ELEM(ELEM_X), 2988 SRC1_NEG(0), 2989 INDEX_MODE(SQ_INDEX_AR_X), 2990 PRED_SEL(SQ_PRED_SEL_OFF), 2991 LAST(0)); 2992 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2993 SRC1_ABS(0), 2994 UPDATE_EXECUTE_MASK(0), 2995 UPDATE_PRED(0), 2996 WRITE_MASK(0), 2997 OMOD(SQ_ALU_OMOD_OFF), 2998 ALU_INST(SQ_OP2_INST_INTERP_XY), 2999 BANK_SWIZZLE(SQ_ALU_VEC_210), 3000 DST_GPR(0), 3001 DST_REL(ABSOLUTE), 3002 DST_ELEM(ELEM_Z), 3003 CLAMP(0)); 3004 /* 27 */ 3005 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3006 SRC0_REL(ABSOLUTE), 3007 SRC0_ELEM(ELEM_X), 3008 SRC0_NEG(0), 3009 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 3010 SRC1_REL(ABSOLUTE), 3011 SRC1_ELEM(ELEM_X), 3012 SRC1_NEG(0), 3013 INDEX_MODE(SQ_INDEX_AR_X), 3014 PRED_SEL(SQ_PRED_SEL_OFF), 3015 LAST(1)); 3016 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3017 SRC1_ABS(0), 3018 UPDATE_EXECUTE_MASK(0), 3019 UPDATE_PRED(0), 3020 WRITE_MASK(0), 3021 OMOD(SQ_ALU_OMOD_OFF), 3022 ALU_INST(SQ_OP2_INST_INTERP_XY), 3023 BANK_SWIZZLE(SQ_ALU_VEC_210), 3024 DST_GPR(0), 3025 DST_REL(ABSOLUTE), 3026 DST_ELEM(ELEM_W), 3027 CLAMP(0)); 3028 3029 /* 28/29 - src - mask */ 3030 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3031 INST_MOD(0), 3032 FETCH_WHOLE_QUAD(0), 3033 RESOURCE_ID(0), 3034 SRC_GPR(1), 3035 SRC_REL(ABSOLUTE), 3036 ALT_CONST(0), 3037 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3038 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3039 shader[i++] = TEX_DWORD1(DST_GPR(1), 3040 DST_REL(ABSOLUTE), 3041 DST_SEL_X(SQ_SEL_X), 3042 DST_SEL_Y(SQ_SEL_Y), 3043 DST_SEL_Z(SQ_SEL_Z), 3044 DST_SEL_W(SQ_SEL_W), 3045 LOD_BIAS(0), 3046 COORD_TYPE_X(TEX_NORMALIZED), 3047 COORD_TYPE_Y(TEX_NORMALIZED), 3048 COORD_TYPE_Z(TEX_NORMALIZED), 3049 COORD_TYPE_W(TEX_NORMALIZED)); 3050 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3051 OFFSET_Y(0), 3052 OFFSET_Z(0), 3053 SAMPLER_ID(0), 3054 SRC_SEL_X(SQ_SEL_X), 3055 SRC_SEL_Y(SQ_SEL_Y), 3056 SRC_SEL_Z(SQ_SEL_0), 3057 SRC_SEL_W(SQ_SEL_1)); 3058 shader[i++] = TEX_DWORD_PAD; 3059 /* 30/31 - mask */ 3060 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3061 INST_MOD(0), 3062 FETCH_WHOLE_QUAD(0), 3063 RESOURCE_ID(1), 3064 SRC_GPR(0), 3065 SRC_REL(ABSOLUTE), 3066 ALT_CONST(0), 3067 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3068 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3069 shader[i++] = TEX_DWORD1(DST_GPR(0), 3070 DST_REL(ABSOLUTE), 3071 DST_SEL_X(SQ_SEL_X), 3072 DST_SEL_Y(SQ_SEL_Y), 3073 DST_SEL_Z(SQ_SEL_Z), 3074 DST_SEL_W(SQ_SEL_W), 3075 LOD_BIAS(0), 3076 COORD_TYPE_X(TEX_NORMALIZED), 3077 COORD_TYPE_Y(TEX_NORMALIZED), 3078 COORD_TYPE_Z(TEX_NORMALIZED), 3079 COORD_TYPE_W(TEX_NORMALIZED)); 3080 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3081 OFFSET_Y(0), 3082 OFFSET_Z(0), 3083 SAMPLER_ID(1), 3084 SRC_SEL_X(SQ_SEL_X), 3085 SRC_SEL_Y(SQ_SEL_Y), 3086 SRC_SEL_Z(SQ_SEL_0), 3087 SRC_SEL_W(SQ_SEL_1)); 3088 shader[i++] = TEX_DWORD_PAD; 3089 3090 /* 32/33 - src - non-mask */ 3091 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3092 INST_MOD(0), 3093 FETCH_WHOLE_QUAD(0), 3094 RESOURCE_ID(0), 3095 SRC_GPR(0), 3096 SRC_REL(ABSOLUTE), 3097 ALT_CONST(0), 3098 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3099 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3100 shader[i++] = TEX_DWORD1(DST_GPR(0), 3101 DST_REL(ABSOLUTE), 3102 DST_SEL_X(SQ_SEL_X), 3103 DST_SEL_Y(SQ_SEL_Y), 3104 DST_SEL_Z(SQ_SEL_Z), 3105 DST_SEL_W(SQ_SEL_W), 3106 LOD_BIAS(0), 3107 COORD_TYPE_X(TEX_NORMALIZED), 3108 COORD_TYPE_Y(TEX_NORMALIZED), 3109 COORD_TYPE_Z(TEX_NORMALIZED), 3110 COORD_TYPE_W(TEX_NORMALIZED)); 3111 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3112 OFFSET_Y(0), 3113 OFFSET_Z(0), 3114 SAMPLER_ID(0), 3115 SRC_SEL_X(SQ_SEL_X), 3116 SRC_SEL_Y(SQ_SEL_Y), 3117 SRC_SEL_Z(SQ_SEL_0), 3118 SRC_SEL_W(SQ_SEL_1)); 3119 shader[i++] = TEX_DWORD_PAD; 3120 3121 return i; 3122} 3123 3124#endif 3125