r600_shader.c revision 921a55d8
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "radeon.h" 34#include "r600_shader.h" 35#include "r600_reg.h" 36 37/* solid vs --------------------------------------- */ 38int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 39{ 40 int i = 0; 41 42 /* 0 */ 43 shader[i++] = CF_DWORD0(ADDR(4)); 44 shader[i++] = CF_DWORD1(POP_COUNT(0), 45 CF_CONST(0), 46 COND(SQ_CF_COND_ACTIVE), 47 I_COUNT(1), 48 CALL_COUNT(0), 49 END_OF_PROGRAM(0), 50 VALID_PIXEL_MODE(0), 51 CF_INST(SQ_CF_INST_VTX), 52 WHOLE_QUAD_MODE(0), 53 BARRIER(1)); 54 /* 1 */ 55 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 56 TYPE(SQ_EXPORT_POS), 57 RW_GPR(1), 58 RW_REL(ABSOLUTE), 59 INDEX_GPR(0), 60 ELEM_SIZE(0)); 61 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 62 SRC_SEL_Y(SQ_SEL_Y), 63 SRC_SEL_Z(SQ_SEL_Z), 64 SRC_SEL_W(SQ_SEL_W), 65 R6xx_ELEM_LOOP(0), 66 BURST_COUNT(1), 67 END_OF_PROGRAM(0), 68 VALID_PIXEL_MODE(0), 69 CF_INST(SQ_CF_INST_EXPORT_DONE), 70 WHOLE_QUAD_MODE(0), 71 BARRIER(1)); 72 /* 2 - always export a param whether it's used or not */ 73 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 74 TYPE(SQ_EXPORT_PARAM), 75 RW_GPR(0), 76 RW_REL(ABSOLUTE), 77 INDEX_GPR(0), 78 ELEM_SIZE(0)); 79 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 80 SRC_SEL_Y(SQ_SEL_Y), 81 SRC_SEL_Z(SQ_SEL_Z), 82 SRC_SEL_W(SQ_SEL_W), 83 R6xx_ELEM_LOOP(0), 84 BURST_COUNT(0), 85 END_OF_PROGRAM(1), 86 VALID_PIXEL_MODE(0), 87 CF_INST(SQ_CF_INST_EXPORT_DONE), 88 WHOLE_QUAD_MODE(0), 89 BARRIER(0)); 90 /* 3 - padding */ 91 shader[i++] = 0x00000000; 92 shader[i++] = 0x00000000; 93 /* 4/5 */ 94 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 95 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 96 FETCH_WHOLE_QUAD(0), 97 BUFFER_ID(0), 98 SRC_GPR(0), 99 SRC_REL(ABSOLUTE), 100 SRC_SEL_X(SQ_SEL_X), 101 MEGA_FETCH_COUNT(8)); 102 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 103 DST_REL(0), 104 DST_SEL_X(SQ_SEL_X), 105 DST_SEL_Y(SQ_SEL_Y), 106 DST_SEL_Z(SQ_SEL_0), 107 DST_SEL_W(SQ_SEL_1), 108 USE_CONST_FIELDS(0), 109 DATA_FORMAT(FMT_32_32_FLOAT), 110 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 111 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 112 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 113 shader[i++] = VTX_DWORD2(OFFSET(0), 114 ENDIAN_SWAP(ENDIAN_NONE), 115 CONST_BUF_NO_STRIDE(0), 116 MEGA_FETCH(1)); 117 shader[i++] = VTX_DWORD_PAD; 118 119 return i; 120} 121 122/* solid ps --------------------------------------- */ 123int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 124{ 125 int i = 0; 126 127 /* 0 */ 128 shader[i++] = CF_ALU_DWORD0(ADDR(2), 129 KCACHE_BANK0(0), 130 KCACHE_BANK1(0), 131 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 132 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 133 KCACHE_ADDR0(0), 134 KCACHE_ADDR1(0), 135 I_COUNT(4), 136 USES_WATERFALL(0), 137 CF_INST(SQ_CF_INST_ALU), 138 WHOLE_QUAD_MODE(0), 139 BARRIER(1)); 140 /* 1 */ 141 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 142 TYPE(SQ_EXPORT_PIXEL), 143 RW_GPR(0), 144 RW_REL(ABSOLUTE), 145 INDEX_GPR(0), 146 ELEM_SIZE(1)); 147 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 148 SRC_SEL_Y(SQ_SEL_Y), 149 SRC_SEL_Z(SQ_SEL_Z), 150 SRC_SEL_W(SQ_SEL_W), 151 R6xx_ELEM_LOOP(0), 152 BURST_COUNT(1), 153 END_OF_PROGRAM(1), 154 VALID_PIXEL_MODE(0), 155 CF_INST(SQ_CF_INST_EXPORT_DONE), 156 WHOLE_QUAD_MODE(0), 157 BARRIER(1)); 158 159 /* 2 */ 160 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 161 SRC0_REL(ABSOLUTE), 162 SRC0_ELEM(ELEM_X), 163 SRC0_NEG(0), 164 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 165 SRC1_REL(ABSOLUTE), 166 SRC1_ELEM(ELEM_X), 167 SRC1_NEG(0), 168 INDEX_MODE(SQ_INDEX_AR_X), 169 PRED_SEL(SQ_PRED_SEL_OFF), 170 LAST(0)); 171 shader[i++] = ALU_DWORD1_OP2(ChipSet, 172 SRC0_ABS(0), 173 SRC1_ABS(0), 174 UPDATE_EXECUTE_MASK(0), 175 UPDATE_PRED(0), 176 WRITE_MASK(1), 177 FOG_MERGE(0), 178 OMOD(SQ_ALU_OMOD_OFF), 179 ALU_INST(SQ_OP2_INST_MOV), 180 BANK_SWIZZLE(SQ_ALU_VEC_012), 181 DST_GPR(0), 182 DST_REL(ABSOLUTE), 183 DST_ELEM(ELEM_X), 184 CLAMP(1)); 185 /* 3 */ 186 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 187 SRC0_REL(ABSOLUTE), 188 SRC0_ELEM(ELEM_Y), 189 SRC0_NEG(0), 190 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 191 SRC1_REL(ABSOLUTE), 192 SRC1_ELEM(ELEM_Y), 193 SRC1_NEG(0), 194 INDEX_MODE(SQ_INDEX_AR_X), 195 PRED_SEL(SQ_PRED_SEL_OFF), 196 LAST(0)); 197 shader[i++] = ALU_DWORD1_OP2(ChipSet, 198 SRC0_ABS(0), 199 SRC1_ABS(0), 200 UPDATE_EXECUTE_MASK(0), 201 UPDATE_PRED(0), 202 WRITE_MASK(1), 203 FOG_MERGE(0), 204 OMOD(SQ_ALU_OMOD_OFF), 205 ALU_INST(SQ_OP2_INST_MOV), 206 BANK_SWIZZLE(SQ_ALU_VEC_012), 207 DST_GPR(0), 208 DST_REL(ABSOLUTE), 209 DST_ELEM(ELEM_Y), 210 CLAMP(1)); 211 /* 4 */ 212 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 213 SRC0_REL(ABSOLUTE), 214 SRC0_ELEM(ELEM_Z), 215 SRC0_NEG(0), 216 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 217 SRC1_REL(ABSOLUTE), 218 SRC1_ELEM(ELEM_Z), 219 SRC1_NEG(0), 220 INDEX_MODE(SQ_INDEX_AR_X), 221 PRED_SEL(SQ_PRED_SEL_OFF), 222 LAST(0)); 223 shader[i++] = ALU_DWORD1_OP2(ChipSet, 224 SRC0_ABS(0), 225 SRC1_ABS(0), 226 UPDATE_EXECUTE_MASK(0), 227 UPDATE_PRED(0), 228 WRITE_MASK(1), 229 FOG_MERGE(0), 230 OMOD(SQ_ALU_OMOD_OFF), 231 ALU_INST(SQ_OP2_INST_MOV), 232 BANK_SWIZZLE(SQ_ALU_VEC_012), 233 DST_GPR(0), 234 DST_REL(ABSOLUTE), 235 DST_ELEM(ELEM_Z), 236 CLAMP(1)); 237 /* 5 */ 238 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 239 SRC0_REL(ABSOLUTE), 240 SRC0_ELEM(ELEM_W), 241 SRC0_NEG(0), 242 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 243 SRC1_REL(ABSOLUTE), 244 SRC1_ELEM(ELEM_W), 245 SRC1_NEG(0), 246 INDEX_MODE(SQ_INDEX_AR_X), 247 PRED_SEL(SQ_PRED_SEL_OFF), 248 LAST(1)); 249 shader[i++] = ALU_DWORD1_OP2(ChipSet, 250 SRC0_ABS(0), 251 SRC1_ABS(0), 252 UPDATE_EXECUTE_MASK(0), 253 UPDATE_PRED(0), 254 WRITE_MASK(1), 255 FOG_MERGE(0), 256 OMOD(SQ_ALU_OMOD_OFF), 257 ALU_INST(SQ_OP2_INST_MOV), 258 BANK_SWIZZLE(SQ_ALU_VEC_012), 259 DST_GPR(0), 260 DST_REL(ABSOLUTE), 261 DST_ELEM(ELEM_W), 262 CLAMP(1)); 263 264 return i; 265} 266 267/* copy vs --------------------------------------- */ 268int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 269{ 270 int i = 0; 271 272 /* 0 */ 273 shader[i++] = CF_DWORD0(ADDR(4)); 274 shader[i++] = CF_DWORD1(POP_COUNT(0), 275 CF_CONST(0), 276 COND(SQ_CF_COND_ACTIVE), 277 I_COUNT(2), 278 CALL_COUNT(0), 279 END_OF_PROGRAM(0), 280 VALID_PIXEL_MODE(0), 281 CF_INST(SQ_CF_INST_VTX), 282 WHOLE_QUAD_MODE(0), 283 BARRIER(1)); 284 /* 1 */ 285 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 286 TYPE(SQ_EXPORT_POS), 287 RW_GPR(1), 288 RW_REL(ABSOLUTE), 289 INDEX_GPR(0), 290 ELEM_SIZE(0)); 291 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 292 SRC_SEL_Y(SQ_SEL_Y), 293 SRC_SEL_Z(SQ_SEL_Z), 294 SRC_SEL_W(SQ_SEL_W), 295 R6xx_ELEM_LOOP(0), 296 BURST_COUNT(0), 297 END_OF_PROGRAM(0), 298 VALID_PIXEL_MODE(0), 299 CF_INST(SQ_CF_INST_EXPORT_DONE), 300 WHOLE_QUAD_MODE(0), 301 BARRIER(1)); 302 /* 2 */ 303 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 304 TYPE(SQ_EXPORT_PARAM), 305 RW_GPR(0), 306 RW_REL(ABSOLUTE), 307 INDEX_GPR(0), 308 ELEM_SIZE(0)); 309 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 310 SRC_SEL_Y(SQ_SEL_Y), 311 SRC_SEL_Z(SQ_SEL_Z), 312 SRC_SEL_W(SQ_SEL_W), 313 R6xx_ELEM_LOOP(0), 314 BURST_COUNT(0), 315 END_OF_PROGRAM(1), 316 VALID_PIXEL_MODE(0), 317 CF_INST(SQ_CF_INST_EXPORT_DONE), 318 WHOLE_QUAD_MODE(0), 319 BARRIER(0)); 320 /* 3 */ 321 shader[i++] = 0x00000000; 322 shader[i++] = 0x00000000; 323 /* 4/5 */ 324 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 325 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 326 FETCH_WHOLE_QUAD(0), 327 BUFFER_ID(0), 328 SRC_GPR(0), 329 SRC_REL(ABSOLUTE), 330 SRC_SEL_X(SQ_SEL_X), 331 MEGA_FETCH_COUNT(16)); 332 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 333 DST_REL(0), 334 DST_SEL_X(SQ_SEL_X), 335 DST_SEL_Y(SQ_SEL_Y), 336 DST_SEL_Z(SQ_SEL_0), 337 DST_SEL_W(SQ_SEL_1), 338 USE_CONST_FIELDS(0), 339 DATA_FORMAT(FMT_32_32_FLOAT), 340 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 341 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 342 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 343 shader[i++] = VTX_DWORD2(OFFSET(0), 344 ENDIAN_SWAP(ENDIAN_NONE), 345 CONST_BUF_NO_STRIDE(0), 346 MEGA_FETCH(1)); 347 shader[i++] = VTX_DWORD_PAD; 348 /* 6/7 */ 349 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 350 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 351 FETCH_WHOLE_QUAD(0), 352 BUFFER_ID(0), 353 SRC_GPR(0), 354 SRC_REL(ABSOLUTE), 355 SRC_SEL_X(SQ_SEL_X), 356 MEGA_FETCH_COUNT(8)); 357 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 358 DST_REL(0), 359 DST_SEL_X(SQ_SEL_X), 360 DST_SEL_Y(SQ_SEL_Y), 361 DST_SEL_Z(SQ_SEL_0), 362 DST_SEL_W(SQ_SEL_1), 363 USE_CONST_FIELDS(0), 364 DATA_FORMAT(FMT_32_32_FLOAT), 365 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 366 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 367 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 368 shader[i++] = VTX_DWORD2(OFFSET(8), 369 ENDIAN_SWAP(ENDIAN_NONE), 370 CONST_BUF_NO_STRIDE(0), 371 MEGA_FETCH(0)); 372 shader[i++] = VTX_DWORD_PAD; 373 374 return i; 375} 376 377/* copy ps --------------------------------------- */ 378int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 379{ 380 int i=0; 381 382 /* CF INST 0 */ 383 shader[i++] = CF_DWORD0(ADDR(2)); 384 shader[i++] = CF_DWORD1(POP_COUNT(0), 385 CF_CONST(0), 386 COND(SQ_CF_COND_ACTIVE), 387 I_COUNT(1), 388 CALL_COUNT(0), 389 END_OF_PROGRAM(0), 390 VALID_PIXEL_MODE(0), 391 CF_INST(SQ_CF_INST_TEX), 392 WHOLE_QUAD_MODE(0), 393 BARRIER(1)); 394 /* CF INST 1 */ 395 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 396 TYPE(SQ_EXPORT_PIXEL), 397 RW_GPR(0), 398 RW_REL(ABSOLUTE), 399 INDEX_GPR(0), 400 ELEM_SIZE(1)); 401 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 402 SRC_SEL_Y(SQ_SEL_Y), 403 SRC_SEL_Z(SQ_SEL_Z), 404 SRC_SEL_W(SQ_SEL_W), 405 R6xx_ELEM_LOOP(0), 406 BURST_COUNT(1), 407 END_OF_PROGRAM(1), 408 VALID_PIXEL_MODE(0), 409 CF_INST(SQ_CF_INST_EXPORT_DONE), 410 WHOLE_QUAD_MODE(0), 411 BARRIER(1)); 412 /* TEX INST 0 */ 413 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 414 BC_FRAC_MODE(0), 415 FETCH_WHOLE_QUAD(0), 416 RESOURCE_ID(0), 417 SRC_GPR(0), 418 SRC_REL(ABSOLUTE), 419 R7xx_ALT_CONST(0)); 420 shader[i++] = TEX_DWORD1(DST_GPR(0), 421 DST_REL(ABSOLUTE), 422 DST_SEL_X(SQ_SEL_X), /* R */ 423 DST_SEL_Y(SQ_SEL_Y), /* G */ 424 DST_SEL_Z(SQ_SEL_Z), /* B */ 425 DST_SEL_W(SQ_SEL_W), /* A */ 426 LOD_BIAS(0), 427 COORD_TYPE_X(TEX_UNNORMALIZED), 428 COORD_TYPE_Y(TEX_UNNORMALIZED), 429 COORD_TYPE_Z(TEX_UNNORMALIZED), 430 COORD_TYPE_W(TEX_UNNORMALIZED)); 431 shader[i++] = TEX_DWORD2(OFFSET_X(0), 432 OFFSET_Y(0), 433 OFFSET_Z(0), 434 SAMPLER_ID(0), 435 SRC_SEL_X(SQ_SEL_X), 436 SRC_SEL_Y(SQ_SEL_Y), 437 SRC_SEL_Z(SQ_SEL_0), 438 SRC_SEL_W(SQ_SEL_1)); 439 shader[i++] = TEX_DWORD_PAD; 440 441 return i; 442} 443 444/* 445 * ; xv vertex shader 446 * 00 VTX: ADDR(4) CNT(2) 447 * 0 VFETCH R1.xy01, R0.x, fc0 MEGA(16) FORMAT(32_32_FLOAT) 448 * FORMAT_COMP(SIGNED) 449 * 1 VFETCH R0.xy01, R0.x, fc0 MINI(8) OFFSET(8) FORMAT(32_32_FLOAT) 450 * FORMAT_COMP(SIGNED) 451 * 01 EXP_DONE: POS0, R1 452 * 02 EXP_DONE: PARAM0, R0 NO_BARRIER 453 * END_OF_PROGRAM 454 */ 455int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 456{ 457 int i = 0; 458 459 /* 0 */ 460 shader[i++] = CF_DWORD0(ADDR(6)); 461 shader[i++] = CF_DWORD1(POP_COUNT(0), 462 CF_CONST(0), 463 COND(SQ_CF_COND_ACTIVE), 464 I_COUNT(2), 465 CALL_COUNT(0), 466 END_OF_PROGRAM(0), 467 VALID_PIXEL_MODE(0), 468 CF_INST(SQ_CF_INST_VTX), 469 WHOLE_QUAD_MODE(0), 470 BARRIER(1)); 471 472 /* 1 - ALU */ 473 shader[i++] = CF_ALU_DWORD0(ADDR(4), 474 KCACHE_BANK0(0), 475 KCACHE_BANK1(0), 476 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 477 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 478 KCACHE_ADDR0(0), 479 KCACHE_ADDR1(0), 480 I_COUNT(2), 481 USES_WATERFALL(0), 482 CF_INST(SQ_CF_INST_ALU), 483 WHOLE_QUAD_MODE(0), 484 BARRIER(1)); 485 486 /* 2 */ 487 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 488 TYPE(SQ_EXPORT_POS), 489 RW_GPR(1), 490 RW_REL(ABSOLUTE), 491 INDEX_GPR(0), 492 ELEM_SIZE(3)); 493 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 494 SRC_SEL_Y(SQ_SEL_Y), 495 SRC_SEL_Z(SQ_SEL_Z), 496 SRC_SEL_W(SQ_SEL_W), 497 R6xx_ELEM_LOOP(0), 498 BURST_COUNT(1), 499 END_OF_PROGRAM(0), 500 VALID_PIXEL_MODE(0), 501 CF_INST(SQ_CF_INST_EXPORT_DONE), 502 WHOLE_QUAD_MODE(0), 503 BARRIER(1)); 504 /* 3 */ 505 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 506 TYPE(SQ_EXPORT_PARAM), 507 RW_GPR(0), 508 RW_REL(ABSOLUTE), 509 INDEX_GPR(0), 510 ELEM_SIZE(3)); 511 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 512 SRC_SEL_Y(SQ_SEL_Y), 513 SRC_SEL_Z(SQ_SEL_Z), 514 SRC_SEL_W(SQ_SEL_W), 515 R6xx_ELEM_LOOP(0), 516 BURST_COUNT(1), 517 END_OF_PROGRAM(1), 518 VALID_PIXEL_MODE(0), 519 CF_INST(SQ_CF_INST_EXPORT_DONE), 520 WHOLE_QUAD_MODE(0), 521 BARRIER(0)); 522 523 524 /* 4 texX / w */ 525 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 526 SRC0_REL(ABSOLUTE), 527 SRC0_ELEM(ELEM_X), 528 SRC0_NEG(0), 529 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 530 SRC1_REL(ABSOLUTE), 531 SRC1_ELEM(ELEM_X), 532 SRC1_NEG(0), 533 INDEX_MODE(SQ_INDEX_AR_X), 534 PRED_SEL(SQ_PRED_SEL_OFF), 535 LAST(0)); 536 shader[i++] = ALU_DWORD1_OP2(ChipSet, 537 SRC0_ABS(0), 538 SRC1_ABS(0), 539 UPDATE_EXECUTE_MASK(0), 540 UPDATE_PRED(0), 541 WRITE_MASK(1), 542 FOG_MERGE(0), 543 OMOD(SQ_ALU_OMOD_OFF), 544 ALU_INST(SQ_OP2_INST_MUL), 545 BANK_SWIZZLE(SQ_ALU_VEC_012), 546 DST_GPR(0), 547 DST_REL(ABSOLUTE), 548 DST_ELEM(ELEM_X), 549 CLAMP(0)); 550 551 /* 5 texY / h */ 552 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 553 SRC0_REL(ABSOLUTE), 554 SRC0_ELEM(ELEM_Y), 555 SRC0_NEG(0), 556 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 557 SRC1_REL(ABSOLUTE), 558 SRC1_ELEM(ELEM_Y), 559 SRC1_NEG(0), 560 INDEX_MODE(SQ_INDEX_AR_X), 561 PRED_SEL(SQ_PRED_SEL_OFF), 562 LAST(1)); 563 shader[i++] = ALU_DWORD1_OP2(ChipSet, 564 SRC0_ABS(0), 565 SRC1_ABS(0), 566 UPDATE_EXECUTE_MASK(0), 567 UPDATE_PRED(0), 568 WRITE_MASK(1), 569 FOG_MERGE(0), 570 OMOD(SQ_ALU_OMOD_OFF), 571 ALU_INST(SQ_OP2_INST_MUL), 572 BANK_SWIZZLE(SQ_ALU_VEC_012), 573 DST_GPR(0), 574 DST_REL(ABSOLUTE), 575 DST_ELEM(ELEM_Y), 576 CLAMP(0)); 577 578 /* 6/7 */ 579 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 580 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 581 FETCH_WHOLE_QUAD(0), 582 BUFFER_ID(0), 583 SRC_GPR(0), 584 SRC_REL(ABSOLUTE), 585 SRC_SEL_X(SQ_SEL_X), 586 MEGA_FETCH_COUNT(16)); 587 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 588 DST_REL(ABSOLUTE), 589 DST_SEL_X(SQ_SEL_X), 590 DST_SEL_Y(SQ_SEL_Y), 591 DST_SEL_Z(SQ_SEL_0), 592 DST_SEL_W(SQ_SEL_1), 593 USE_CONST_FIELDS(0), 594 DATA_FORMAT(FMT_32_32_FLOAT), 595 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 596 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 597 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 598 shader[i++] = VTX_DWORD2(OFFSET(0), 599 ENDIAN_SWAP(ENDIAN_NONE), 600 CONST_BUF_NO_STRIDE(0), 601 MEGA_FETCH(1)); 602 shader[i++] = VTX_DWORD_PAD; 603 /* 8/9 */ 604 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 605 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 606 FETCH_WHOLE_QUAD(0), 607 BUFFER_ID(0), 608 SRC_GPR(0), 609 SRC_REL(ABSOLUTE), 610 SRC_SEL_X(SQ_SEL_X), 611 MEGA_FETCH_COUNT(8)); 612 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 613 DST_REL(ABSOLUTE), 614 DST_SEL_X(SQ_SEL_X), 615 DST_SEL_Y(SQ_SEL_Y), 616 DST_SEL_Z(SQ_SEL_0), 617 DST_SEL_W(SQ_SEL_1), 618 USE_CONST_FIELDS(0), 619 DATA_FORMAT(FMT_32_32_FLOAT), 620 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 621 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 622 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 623 shader[i++] = VTX_DWORD2(OFFSET(8), 624 ENDIAN_SWAP(ENDIAN_NONE), 625 CONST_BUF_NO_STRIDE(0), 626 MEGA_FETCH(0)); 627 shader[i++] = VTX_DWORD_PAD; 628 629 return i; 630} 631 632int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 633{ 634 int i = 0; 635 636 /* 0 */ 637 shader[i++] = CF_DWORD0(ADDR(16)); 638 shader[i++] = CF_DWORD1(POP_COUNT(0), 639 CF_CONST(0), 640 COND(SQ_CF_COND_BOOL), 641 I_COUNT(0), 642 CALL_COUNT(0), 643 END_OF_PROGRAM(0), 644 VALID_PIXEL_MODE(0), 645 CF_INST(SQ_CF_INST_CALL), 646 WHOLE_QUAD_MODE(0), 647 BARRIER(0)); 648 /* 1 */ 649 shader[i++] = CF_DWORD0(ADDR(24)); 650 shader[i++] = CF_DWORD1(POP_COUNT(0), 651 CF_CONST(0), 652 COND(SQ_CF_COND_NOT_BOOL), 653 I_COUNT(0), 654 CALL_COUNT(0), 655 END_OF_PROGRAM(0), 656 VALID_PIXEL_MODE(0), 657 CF_INST(SQ_CF_INST_CALL), 658 WHOLE_QUAD_MODE(0), 659 BARRIER(0)); 660 /* 2 */ 661 shader[i++] = CF_ALU_DWORD0(ADDR(4), 662 KCACHE_BANK0(0), 663 KCACHE_BANK1(0), 664 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 665 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 666 KCACHE_ADDR0(0), 667 KCACHE_ADDR1(0), 668 I_COUNT(12), 669 USES_WATERFALL(0), 670 CF_INST(SQ_CF_INST_ALU), 671 WHOLE_QUAD_MODE(0), 672 BARRIER(1)); 673 /* 3 */ 674 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 675 TYPE(SQ_EXPORT_PIXEL), 676 RW_GPR(2), 677 RW_REL(ABSOLUTE), 678 INDEX_GPR(0), 679 ELEM_SIZE(3)); 680 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 681 SRC_SEL_Y(SQ_SEL_Y), 682 SRC_SEL_Z(SQ_SEL_Z), 683 SRC_SEL_W(SQ_SEL_W), 684 R6xx_ELEM_LOOP(0), 685 BURST_COUNT(1), 686 END_OF_PROGRAM(1), 687 VALID_PIXEL_MODE(0), 688 CF_INST(SQ_CF_INST_EXPORT_DONE), 689 WHOLE_QUAD_MODE(0), 690 BARRIER(1)); 691 /* 4,5,6,7 */ 692 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 693 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 694 SRC0_REL(ABSOLUTE), 695 SRC0_ELEM(ELEM_W), 696 SRC0_NEG(0), 697 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 698 SRC1_REL(ABSOLUTE), 699 SRC1_ELEM(ELEM_X), 700 SRC1_NEG(0), 701 INDEX_MODE(SQ_INDEX_LOOP), 702 PRED_SEL(SQ_PRED_SEL_OFF), 703 LAST(0)); 704 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 705 SRC2_REL(ABSOLUTE), 706 SRC2_ELEM(ELEM_X), 707 SRC2_NEG(0), 708 ALU_INST(SQ_OP3_INST_MULADD), 709 BANK_SWIZZLE(SQ_ALU_VEC_012), 710 DST_GPR(2), 711 DST_REL(ABSOLUTE), 712 DST_ELEM(ELEM_X), 713 CLAMP(0)); 714 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 715 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 716 SRC0_REL(ABSOLUTE), 717 SRC0_ELEM(ELEM_W), 718 SRC0_NEG(0), 719 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 720 SRC1_REL(ABSOLUTE), 721 SRC1_ELEM(ELEM_X), 722 SRC1_NEG(0), 723 INDEX_MODE(SQ_INDEX_LOOP), 724 PRED_SEL(SQ_PRED_SEL_OFF), 725 LAST(0)); 726 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 727 SRC2_REL(ABSOLUTE), 728 SRC2_ELEM(ELEM_Y), 729 SRC2_NEG(0), 730 ALU_INST(SQ_OP3_INST_MULADD), 731 BANK_SWIZZLE(SQ_ALU_VEC_012), 732 DST_GPR(2), 733 DST_REL(ABSOLUTE), 734 DST_ELEM(ELEM_Y), 735 CLAMP(0)); 736 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 737 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 738 SRC0_REL(ABSOLUTE), 739 SRC0_ELEM(ELEM_W), 740 SRC0_NEG(0), 741 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 742 SRC1_REL(ABSOLUTE), 743 SRC1_ELEM(ELEM_X), 744 SRC1_NEG(0), 745 INDEX_MODE(SQ_INDEX_LOOP), 746 PRED_SEL(SQ_PRED_SEL_OFF), 747 LAST(0)); 748 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 749 SRC2_REL(ABSOLUTE), 750 SRC2_ELEM(ELEM_Z), 751 SRC2_NEG(0), 752 ALU_INST(SQ_OP3_INST_MULADD), 753 BANK_SWIZZLE(SQ_ALU_VEC_012), 754 DST_GPR(2), 755 DST_REL(ABSOLUTE), 756 DST_ELEM(ELEM_Z), 757 CLAMP(0)); 758 /* r2.w = MAD(0, 0, 1) */ 759 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 760 SRC0_REL(ABSOLUTE), 761 SRC0_ELEM(ELEM_X), 762 SRC0_NEG(0), 763 SRC1_SEL(SQ_ALU_SRC_0), 764 SRC1_REL(ABSOLUTE), 765 SRC1_ELEM(ELEM_X), 766 SRC1_NEG(0), 767 INDEX_MODE(SQ_INDEX_LOOP), 768 PRED_SEL(SQ_PRED_SEL_OFF), 769 LAST(1)); 770 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 771 SRC2_REL(ABSOLUTE), 772 SRC2_ELEM(ELEM_X), 773 SRC2_NEG(0), 774 ALU_INST(SQ_OP3_INST_MULADD), 775 BANK_SWIZZLE(SQ_ALU_VEC_012), 776 DST_GPR(2), 777 DST_REL(ABSOLUTE), 778 DST_ELEM(ELEM_W), 779 CLAMP(0)); 780 781 /* 8,9,10,11 */ 782 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 783 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 784 SRC0_REL(ABSOLUTE), 785 SRC0_ELEM(ELEM_X), 786 SRC0_NEG(0), 787 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 788 SRC1_REL(ABSOLUTE), 789 SRC1_ELEM(ELEM_Y), 790 SRC1_NEG(0), 791 INDEX_MODE(SQ_INDEX_LOOP), 792 PRED_SEL(SQ_PRED_SEL_OFF), 793 LAST(0)); 794 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 795 SRC2_REL(ABSOLUTE), 796 SRC2_ELEM(ELEM_X), 797 SRC2_NEG(0), 798 ALU_INST(SQ_OP3_INST_MULADD), 799 BANK_SWIZZLE(SQ_ALU_VEC_012), 800 DST_GPR(2), 801 DST_REL(ABSOLUTE), 802 DST_ELEM(ELEM_X), 803 CLAMP(0)); 804 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 805 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 806 SRC0_REL(ABSOLUTE), 807 SRC0_ELEM(ELEM_Y), 808 SRC0_NEG(0), 809 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 810 SRC1_REL(ABSOLUTE), 811 SRC1_ELEM(ELEM_Y), 812 SRC1_NEG(0), 813 INDEX_MODE(SQ_INDEX_LOOP), 814 PRED_SEL(SQ_PRED_SEL_OFF), 815 LAST(0)); 816 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 817 SRC2_REL(ABSOLUTE), 818 SRC2_ELEM(ELEM_Y), 819 SRC2_NEG(0), 820 ALU_INST(SQ_OP3_INST_MULADD), 821 BANK_SWIZZLE(SQ_ALU_VEC_012), 822 DST_GPR(2), 823 DST_REL(ABSOLUTE), 824 DST_ELEM(ELEM_Y), 825 CLAMP(0)); 826 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 827 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 828 SRC0_REL(ABSOLUTE), 829 SRC0_ELEM(ELEM_Z), 830 SRC0_NEG(0), 831 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 832 SRC1_REL(ABSOLUTE), 833 SRC1_ELEM(ELEM_Y), 834 SRC1_NEG(0), 835 INDEX_MODE(SQ_INDEX_LOOP), 836 PRED_SEL(SQ_PRED_SEL_OFF), 837 LAST(0)); 838 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 839 SRC2_REL(ABSOLUTE), 840 SRC2_ELEM(ELEM_Z), 841 SRC2_NEG(0), 842 ALU_INST(SQ_OP3_INST_MULADD), 843 BANK_SWIZZLE(SQ_ALU_VEC_012), 844 DST_GPR(2), 845 DST_REL(ABSOLUTE), 846 DST_ELEM(ELEM_Z), 847 CLAMP(0)); 848 /* r2.w = MAD(0, 0, 1) */ 849 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 850 SRC0_REL(ABSOLUTE), 851 SRC0_ELEM(ELEM_X), 852 SRC0_NEG(0), 853 SRC1_SEL(SQ_ALU_SRC_0), 854 SRC1_REL(ABSOLUTE), 855 SRC1_ELEM(ELEM_X), 856 SRC1_NEG(0), 857 INDEX_MODE(SQ_INDEX_LOOP), 858 PRED_SEL(SQ_PRED_SEL_OFF), 859 LAST(1)); 860 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 861 SRC2_REL(ABSOLUTE), 862 SRC2_ELEM(ELEM_W), 863 SRC2_NEG(0), 864 ALU_INST(SQ_OP3_INST_MULADD), 865 BANK_SWIZZLE(SQ_ALU_VEC_012), 866 DST_GPR(2), 867 DST_REL(ABSOLUTE), 868 DST_ELEM(ELEM_W), 869 CLAMP(0)); 870 /* 12,13,14,15 */ 871 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 872 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 873 SRC0_REL(ABSOLUTE), 874 SRC0_ELEM(ELEM_X), 875 SRC0_NEG(0), 876 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 877 SRC1_REL(ABSOLUTE), 878 SRC1_ELEM(ELEM_Z), 879 SRC1_NEG(0), 880 INDEX_MODE(SQ_INDEX_LOOP), 881 PRED_SEL(SQ_PRED_SEL_OFF), 882 LAST(0)); 883 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 884 SRC2_REL(ABSOLUTE), 885 SRC2_ELEM(ELEM_X), 886 SRC2_NEG(0), 887 ALU_INST(SQ_OP3_INST_MULADD), 888 BANK_SWIZZLE(SQ_ALU_VEC_012), 889 DST_GPR(2), 890 DST_REL(ABSOLUTE), 891 DST_ELEM(ELEM_X), 892 CLAMP(1)); 893 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 894 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 895 SRC0_REL(ABSOLUTE), 896 SRC0_ELEM(ELEM_Y), 897 SRC0_NEG(0), 898 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 899 SRC1_REL(ABSOLUTE), 900 SRC1_ELEM(ELEM_Z), 901 SRC1_NEG(0), 902 INDEX_MODE(SQ_INDEX_LOOP), 903 PRED_SEL(SQ_PRED_SEL_OFF), 904 LAST(0)); 905 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 906 SRC2_REL(ABSOLUTE), 907 SRC2_ELEM(ELEM_Y), 908 SRC2_NEG(0), 909 ALU_INST(SQ_OP3_INST_MULADD), 910 BANK_SWIZZLE(SQ_ALU_VEC_012), 911 DST_GPR(2), 912 DST_REL(ABSOLUTE), 913 DST_ELEM(ELEM_Y), 914 CLAMP(1)); 915 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 916 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 917 SRC0_REL(ABSOLUTE), 918 SRC0_ELEM(ELEM_Z), 919 SRC0_NEG(0), 920 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 921 SRC1_REL(ABSOLUTE), 922 SRC1_ELEM(ELEM_Z), 923 SRC1_NEG(0), 924 INDEX_MODE(SQ_INDEX_LOOP), 925 PRED_SEL(SQ_PRED_SEL_OFF), 926 LAST(0)); 927 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 928 SRC2_REL(ABSOLUTE), 929 SRC2_ELEM(ELEM_Z), 930 SRC2_NEG(0), 931 ALU_INST(SQ_OP3_INST_MULADD), 932 BANK_SWIZZLE(SQ_ALU_VEC_012), 933 DST_GPR(2), 934 DST_REL(ABSOLUTE), 935 DST_ELEM(ELEM_Z), 936 CLAMP(1)); 937 /* r2.w = MAD(0, 0, 1) */ 938 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 939 SRC0_REL(ABSOLUTE), 940 SRC0_ELEM(ELEM_X), 941 SRC0_NEG(0), 942 SRC1_SEL(SQ_ALU_SRC_0), 943 SRC1_REL(ABSOLUTE), 944 SRC1_ELEM(ELEM_X), 945 SRC1_NEG(0), 946 INDEX_MODE(SQ_INDEX_LOOP), 947 PRED_SEL(SQ_PRED_SEL_OFF), 948 LAST(1)); 949 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 950 SRC2_REL(ABSOLUTE), 951 SRC2_ELEM(ELEM_X), 952 SRC2_NEG(0), 953 ALU_INST(SQ_OP3_INST_MULADD), 954 BANK_SWIZZLE(SQ_ALU_VEC_012), 955 DST_GPR(2), 956 DST_REL(ABSOLUTE), 957 DST_ELEM(ELEM_W), 958 CLAMP(1)); 959 960 /* 16 */ 961 shader[i++] = CF_DWORD0(ADDR(18)); 962 shader[i++] = CF_DWORD1(POP_COUNT(0), 963 CF_CONST(0), 964 COND(SQ_CF_COND_ACTIVE), 965 I_COUNT(3), 966 CALL_COUNT(0), 967 END_OF_PROGRAM(0), 968 VALID_PIXEL_MODE(0), 969 CF_INST(SQ_CF_INST_TEX), 970 WHOLE_QUAD_MODE(0), 971 BARRIER(1)); 972 /* 17 */ 973 shader[i++] = CF_DWORD0(ADDR(0)); 974 shader[i++] = CF_DWORD1(POP_COUNT(0), 975 CF_CONST(0), 976 COND(SQ_CF_COND_ACTIVE), 977 I_COUNT(0), 978 CALL_COUNT(0), 979 END_OF_PROGRAM(0), 980 VALID_PIXEL_MODE(0), 981 CF_INST(SQ_CF_INST_RETURN), 982 WHOLE_QUAD_MODE(0), 983 BARRIER(1)); 984 /* 18/19 */ 985 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 986 BC_FRAC_MODE(0), 987 FETCH_WHOLE_QUAD(0), 988 RESOURCE_ID(0), 989 SRC_GPR(0), 990 SRC_REL(ABSOLUTE), 991 R7xx_ALT_CONST(0)); 992 shader[i++] = TEX_DWORD1(DST_GPR(1), 993 DST_REL(ABSOLUTE), 994 DST_SEL_X(SQ_SEL_X), 995 DST_SEL_Y(SQ_SEL_MASK), 996 DST_SEL_Z(SQ_SEL_MASK), 997 DST_SEL_W(SQ_SEL_1), 998 LOD_BIAS(0), 999 COORD_TYPE_X(TEX_NORMALIZED), 1000 COORD_TYPE_Y(TEX_NORMALIZED), 1001 COORD_TYPE_Z(TEX_NORMALIZED), 1002 COORD_TYPE_W(TEX_NORMALIZED)); 1003 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1004 OFFSET_Y(0), 1005 OFFSET_Z(0), 1006 SAMPLER_ID(0), 1007 SRC_SEL_X(SQ_SEL_X), 1008 SRC_SEL_Y(SQ_SEL_Y), 1009 SRC_SEL_Z(SQ_SEL_0), 1010 SRC_SEL_W(SQ_SEL_1)); 1011 shader[i++] = TEX_DWORD_PAD; 1012 /* 20/21 */ 1013 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1014 BC_FRAC_MODE(0), 1015 FETCH_WHOLE_QUAD(0), 1016 RESOURCE_ID(1), 1017 SRC_GPR(0), 1018 SRC_REL(ABSOLUTE), 1019 R7xx_ALT_CONST(0)); 1020 shader[i++] = TEX_DWORD1(DST_GPR(1), 1021 DST_REL(ABSOLUTE), 1022 DST_SEL_X(SQ_SEL_MASK), 1023 DST_SEL_Y(SQ_SEL_MASK), 1024 DST_SEL_Z(SQ_SEL_X), 1025 DST_SEL_W(SQ_SEL_MASK), 1026 LOD_BIAS(0), 1027 COORD_TYPE_X(TEX_NORMALIZED), 1028 COORD_TYPE_Y(TEX_NORMALIZED), 1029 COORD_TYPE_Z(TEX_NORMALIZED), 1030 COORD_TYPE_W(TEX_NORMALIZED)); 1031 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1032 OFFSET_Y(0), 1033 OFFSET_Z(0), 1034 SAMPLER_ID(1), 1035 SRC_SEL_X(SQ_SEL_X), 1036 SRC_SEL_Y(SQ_SEL_Y), 1037 SRC_SEL_Z(SQ_SEL_0), 1038 SRC_SEL_W(SQ_SEL_1)); 1039 shader[i++] = TEX_DWORD_PAD; 1040 /* 22/23 */ 1041 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1042 BC_FRAC_MODE(0), 1043 FETCH_WHOLE_QUAD(0), 1044 RESOURCE_ID(2), 1045 SRC_GPR(0), 1046 SRC_REL(ABSOLUTE), 1047 R7xx_ALT_CONST(0)); 1048 shader[i++] = TEX_DWORD1(DST_GPR(1), 1049 DST_REL(ABSOLUTE), 1050 DST_SEL_X(SQ_SEL_MASK), 1051 DST_SEL_Y(SQ_SEL_X), 1052 DST_SEL_Z(SQ_SEL_MASK), 1053 DST_SEL_W(SQ_SEL_MASK), 1054 LOD_BIAS(0), 1055 COORD_TYPE_X(TEX_NORMALIZED), 1056 COORD_TYPE_Y(TEX_NORMALIZED), 1057 COORD_TYPE_Z(TEX_NORMALIZED), 1058 COORD_TYPE_W(TEX_NORMALIZED)); 1059 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1060 OFFSET_Y(0), 1061 OFFSET_Z(0), 1062 SAMPLER_ID(2), 1063 SRC_SEL_X(SQ_SEL_X), 1064 SRC_SEL_Y(SQ_SEL_Y), 1065 SRC_SEL_Z(SQ_SEL_0), 1066 SRC_SEL_W(SQ_SEL_1)); 1067 shader[i++] = TEX_DWORD_PAD; 1068 /* 24 */ 1069 shader[i++] = CF_DWORD0(ADDR(26)); 1070 shader[i++] = CF_DWORD1(POP_COUNT(0), 1071 CF_CONST(0), 1072 COND(SQ_CF_COND_ACTIVE), 1073 I_COUNT(2), 1074 CALL_COUNT(0), 1075 END_OF_PROGRAM(0), 1076 VALID_PIXEL_MODE(0), 1077 CF_INST(SQ_CF_INST_TEX), 1078 WHOLE_QUAD_MODE(0), 1079 BARRIER(1)); 1080 /* 25 */ 1081 shader[i++] = CF_DWORD0(ADDR(0)); 1082 shader[i++] = CF_DWORD1(POP_COUNT(0), 1083 CF_CONST(0), 1084 COND(SQ_CF_COND_ACTIVE), 1085 I_COUNT(0), 1086 CALL_COUNT(0), 1087 END_OF_PROGRAM(0), 1088 VALID_PIXEL_MODE(0), 1089 CF_INST(SQ_CF_INST_RETURN), 1090 WHOLE_QUAD_MODE(0), 1091 BARRIER(1)); 1092 /* 26/27 */ 1093 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1094 BC_FRAC_MODE(0), 1095 FETCH_WHOLE_QUAD(0), 1096 RESOURCE_ID(0), 1097 SRC_GPR(0), 1098 SRC_REL(ABSOLUTE), 1099 R7xx_ALT_CONST(0)); 1100 shader[i++] = TEX_DWORD1(DST_GPR(1), 1101 DST_REL(ABSOLUTE), 1102 DST_SEL_X(SQ_SEL_X), 1103 DST_SEL_Y(SQ_SEL_MASK), 1104 DST_SEL_Z(SQ_SEL_MASK), 1105 DST_SEL_W(SQ_SEL_1), 1106 LOD_BIAS(0), 1107 COORD_TYPE_X(TEX_NORMALIZED), 1108 COORD_TYPE_Y(TEX_NORMALIZED), 1109 COORD_TYPE_Z(TEX_NORMALIZED), 1110 COORD_TYPE_W(TEX_NORMALIZED)); 1111 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1112 OFFSET_Y(0), 1113 OFFSET_Z(0), 1114 SAMPLER_ID(0), 1115 SRC_SEL_X(SQ_SEL_X), 1116 SRC_SEL_Y(SQ_SEL_Y), 1117 SRC_SEL_Z(SQ_SEL_0), 1118 SRC_SEL_W(SQ_SEL_1)); 1119 shader[i++] = TEX_DWORD_PAD; 1120 /* 28/29 */ 1121 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1122 BC_FRAC_MODE(0), 1123 FETCH_WHOLE_QUAD(0), 1124 RESOURCE_ID(1), 1125 SRC_GPR(0), 1126 SRC_REL(ABSOLUTE), 1127 R7xx_ALT_CONST(0)); 1128 shader[i++] = TEX_DWORD1(DST_GPR(1), 1129 DST_REL(ABSOLUTE), 1130 DST_SEL_X(SQ_SEL_MASK), 1131 DST_SEL_Y(SQ_SEL_X), 1132 DST_SEL_Z(SQ_SEL_Y), 1133 DST_SEL_W(SQ_SEL_MASK), 1134 LOD_BIAS(0), 1135 COORD_TYPE_X(TEX_NORMALIZED), 1136 COORD_TYPE_Y(TEX_NORMALIZED), 1137 COORD_TYPE_Z(TEX_NORMALIZED), 1138 COORD_TYPE_W(TEX_NORMALIZED)); 1139 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1140 OFFSET_Y(0), 1141 OFFSET_Z(0), 1142 SAMPLER_ID(1), 1143 SRC_SEL_X(SQ_SEL_X), 1144 SRC_SEL_Y(SQ_SEL_Y), 1145 SRC_SEL_Z(SQ_SEL_0), 1146 SRC_SEL_W(SQ_SEL_1)); 1147 shader[i++] = TEX_DWORD_PAD; 1148 1149 return i; 1150} 1151 1152/* comp vs --------------------------------------- */ 1153int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1154{ 1155 int i = 0; 1156 1157 /* 0 */ 1158 shader[i++] = CF_DWORD0(ADDR(3)); 1159 shader[i++] = CF_DWORD1(POP_COUNT(0), 1160 CF_CONST(0), 1161 COND(SQ_CF_COND_BOOL), 1162 I_COUNT(0), 1163 CALL_COUNT(0), 1164 END_OF_PROGRAM(0), 1165 VALID_PIXEL_MODE(0), 1166 CF_INST(SQ_CF_INST_CALL), 1167 WHOLE_QUAD_MODE(0), 1168 BARRIER(0)); 1169 /* 1 */ 1170 shader[i++] = CF_DWORD0(ADDR(9)); 1171 shader[i++] = CF_DWORD1(POP_COUNT(0), 1172 CF_CONST(0), 1173 COND(SQ_CF_COND_NOT_BOOL), 1174 I_COUNT(0), 1175 CALL_COUNT(0), 1176 END_OF_PROGRAM(0), 1177 VALID_PIXEL_MODE(0), 1178 CF_INST(SQ_CF_INST_CALL), 1179 WHOLE_QUAD_MODE(0), 1180 BARRIER(0)); 1181 /* 2 */ 1182 shader[i++] = CF_DWORD0(ADDR(0)); 1183 shader[i++] = CF_DWORD1(POP_COUNT(0), 1184 CF_CONST(0), 1185 COND(SQ_CF_COND_ACTIVE), 1186 I_COUNT(0), 1187 CALL_COUNT(0), 1188 END_OF_PROGRAM(1), 1189 VALID_PIXEL_MODE(0), 1190 CF_INST(SQ_CF_INST_NOP), 1191 WHOLE_QUAD_MODE(0), 1192 BARRIER(1)); 1193 /* 3 - mask sub */ 1194 shader[i++] = CF_DWORD0(ADDR(44)); 1195 shader[i++] = CF_DWORD1(POP_COUNT(0), 1196 CF_CONST(0), 1197 COND(SQ_CF_COND_ACTIVE), 1198 I_COUNT(3), 1199 CALL_COUNT(0), 1200 END_OF_PROGRAM(0), 1201 VALID_PIXEL_MODE(0), 1202 CF_INST(SQ_CF_INST_VTX), 1203 WHOLE_QUAD_MODE(0), 1204 BARRIER(1)); 1205 1206 /* 4 - ALU */ 1207 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1208 KCACHE_BANK0(0), 1209 KCACHE_BANK1(0), 1210 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1211 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1212 KCACHE_ADDR0(0), 1213 KCACHE_ADDR1(0), 1214 I_COUNT(20), 1215 USES_WATERFALL(0), 1216 CF_INST(SQ_CF_INST_ALU), 1217 WHOLE_QUAD_MODE(0), 1218 BARRIER(1)); 1219 1220 /* 5 - dst */ 1221 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1222 TYPE(SQ_EXPORT_POS), 1223 RW_GPR(2), 1224 RW_REL(ABSOLUTE), 1225 INDEX_GPR(0), 1226 ELEM_SIZE(0)); 1227 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1228 SRC_SEL_Y(SQ_SEL_Y), 1229 SRC_SEL_Z(SQ_SEL_0), 1230 SRC_SEL_W(SQ_SEL_1), 1231 R6xx_ELEM_LOOP(0), 1232 BURST_COUNT(1), 1233 END_OF_PROGRAM(0), 1234 VALID_PIXEL_MODE(0), 1235 CF_INST(SQ_CF_INST_EXPORT_DONE), 1236 WHOLE_QUAD_MODE(0), 1237 BARRIER(1)); 1238 /* 6 - src */ 1239 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1240 TYPE(SQ_EXPORT_PARAM), 1241 RW_GPR(1), 1242 RW_REL(ABSOLUTE), 1243 INDEX_GPR(0), 1244 ELEM_SIZE(0)); 1245 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1246 SRC_SEL_Y(SQ_SEL_Y), 1247 SRC_SEL_Z(SQ_SEL_0), 1248 SRC_SEL_W(SQ_SEL_1), 1249 R6xx_ELEM_LOOP(0), 1250 BURST_COUNT(1), 1251 END_OF_PROGRAM(0), 1252 VALID_PIXEL_MODE(0), 1253 CF_INST(SQ_CF_INST_EXPORT), 1254 WHOLE_QUAD_MODE(0), 1255 BARRIER(0)); 1256 /* 7 - mask */ 1257 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1258 TYPE(SQ_EXPORT_PARAM), 1259 RW_GPR(0), 1260 RW_REL(ABSOLUTE), 1261 INDEX_GPR(0), 1262 ELEM_SIZE(0)); 1263 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1264 SRC_SEL_Y(SQ_SEL_Y), 1265 SRC_SEL_Z(SQ_SEL_0), 1266 SRC_SEL_W(SQ_SEL_1), 1267 R6xx_ELEM_LOOP(0), 1268 BURST_COUNT(1), 1269 END_OF_PROGRAM(0), 1270 VALID_PIXEL_MODE(0), 1271 CF_INST(SQ_CF_INST_EXPORT_DONE), 1272 WHOLE_QUAD_MODE(0), 1273 BARRIER(0)); 1274 /* 8 */ 1275 shader[i++] = CF_DWORD0(ADDR(0)); 1276 shader[i++] = CF_DWORD1(POP_COUNT(0), 1277 CF_CONST(0), 1278 COND(SQ_CF_COND_ACTIVE), 1279 I_COUNT(0), 1280 CALL_COUNT(0), 1281 END_OF_PROGRAM(0), 1282 VALID_PIXEL_MODE(0), 1283 CF_INST(SQ_CF_INST_RETURN), 1284 WHOLE_QUAD_MODE(0), 1285 BARRIER(1)); 1286 /* 9 - non-mask sub */ 1287 shader[i++] = CF_DWORD0(ADDR(50)); 1288 shader[i++] = CF_DWORD1(POP_COUNT(0), 1289 CF_CONST(0), 1290 COND(SQ_CF_COND_ACTIVE), 1291 I_COUNT(2), 1292 CALL_COUNT(0), 1293 END_OF_PROGRAM(0), 1294 VALID_PIXEL_MODE(0), 1295 CF_INST(SQ_CF_INST_VTX), 1296 WHOLE_QUAD_MODE(0), 1297 BARRIER(1)); 1298 1299 /* 10 - ALU */ 1300 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1301 KCACHE_BANK0(0), 1302 KCACHE_BANK1(0), 1303 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1304 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1305 KCACHE_ADDR0(0), 1306 KCACHE_ADDR1(0), 1307 I_COUNT(10), 1308 USES_WATERFALL(0), 1309 CF_INST(SQ_CF_INST_ALU), 1310 WHOLE_QUAD_MODE(0), 1311 BARRIER(1)); 1312 1313 /* 11 - dst */ 1314 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1315 TYPE(SQ_EXPORT_POS), 1316 RW_GPR(1), 1317 RW_REL(ABSOLUTE), 1318 INDEX_GPR(0), 1319 ELEM_SIZE(0)); 1320 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1321 SRC_SEL_Y(SQ_SEL_Y), 1322 SRC_SEL_Z(SQ_SEL_0), 1323 SRC_SEL_W(SQ_SEL_1), 1324 R6xx_ELEM_LOOP(0), 1325 BURST_COUNT(0), 1326 END_OF_PROGRAM(0), 1327 VALID_PIXEL_MODE(0), 1328 CF_INST(SQ_CF_INST_EXPORT_DONE), 1329 WHOLE_QUAD_MODE(0), 1330 BARRIER(1)); 1331 /* 12 - src */ 1332 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1333 TYPE(SQ_EXPORT_PARAM), 1334 RW_GPR(0), 1335 RW_REL(ABSOLUTE), 1336 INDEX_GPR(0), 1337 ELEM_SIZE(0)); 1338 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1339 SRC_SEL_Y(SQ_SEL_Y), 1340 SRC_SEL_Z(SQ_SEL_0), 1341 SRC_SEL_W(SQ_SEL_1), 1342 R6xx_ELEM_LOOP(0), 1343 BURST_COUNT(0), 1344 END_OF_PROGRAM(0), 1345 VALID_PIXEL_MODE(0), 1346 CF_INST(SQ_CF_INST_EXPORT_DONE), 1347 WHOLE_QUAD_MODE(0), 1348 BARRIER(0)); 1349 /* 13 */ 1350 shader[i++] = CF_DWORD0(ADDR(0)); 1351 shader[i++] = CF_DWORD1(POP_COUNT(0), 1352 CF_CONST(0), 1353 COND(SQ_CF_COND_ACTIVE), 1354 I_COUNT(0), 1355 CALL_COUNT(0), 1356 END_OF_PROGRAM(0), 1357 VALID_PIXEL_MODE(0), 1358 CF_INST(SQ_CF_INST_RETURN), 1359 WHOLE_QUAD_MODE(0), 1360 BARRIER(1)); 1361 1362 1363 /* 14 srcX.x DOT4 - mask */ 1364 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1365 SRC0_REL(ABSOLUTE), 1366 SRC0_ELEM(ELEM_X), 1367 SRC0_NEG(0), 1368 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1369 SRC1_REL(ABSOLUTE), 1370 SRC1_ELEM(ELEM_X), 1371 SRC1_NEG(0), 1372 INDEX_MODE(SQ_INDEX_LOOP), 1373 PRED_SEL(SQ_PRED_SEL_OFF), 1374 LAST(0)); 1375 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1376 SRC0_ABS(0), 1377 SRC1_ABS(0), 1378 UPDATE_EXECUTE_MASK(0), 1379 UPDATE_PRED(0), 1380 WRITE_MASK(1), 1381 FOG_MERGE(0), 1382 OMOD(SQ_ALU_OMOD_OFF), 1383 ALU_INST(SQ_OP2_INST_DOT4), 1384 BANK_SWIZZLE(SQ_ALU_VEC_012), 1385 DST_GPR(3), 1386 DST_REL(ABSOLUTE), 1387 DST_ELEM(ELEM_X), 1388 CLAMP(0)); 1389 1390 /* 15 srcX.y DOT4 - mask */ 1391 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1392 SRC0_REL(ABSOLUTE), 1393 SRC0_ELEM(ELEM_Y), 1394 SRC0_NEG(0), 1395 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1396 SRC1_REL(ABSOLUTE), 1397 SRC1_ELEM(ELEM_Y), 1398 SRC1_NEG(0), 1399 INDEX_MODE(SQ_INDEX_LOOP), 1400 PRED_SEL(SQ_PRED_SEL_OFF), 1401 LAST(0)); 1402 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1403 SRC0_ABS(0), 1404 SRC1_ABS(0), 1405 UPDATE_EXECUTE_MASK(0), 1406 UPDATE_PRED(0), 1407 WRITE_MASK(0), 1408 FOG_MERGE(0), 1409 OMOD(SQ_ALU_OMOD_OFF), 1410 ALU_INST(SQ_OP2_INST_DOT4), 1411 BANK_SWIZZLE(SQ_ALU_VEC_012), 1412 DST_GPR(3), 1413 DST_REL(ABSOLUTE), 1414 DST_ELEM(ELEM_Y), 1415 CLAMP(0)); 1416 1417 /* 16 srcX.z DOT4 - mask */ 1418 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1419 SRC0_REL(ABSOLUTE), 1420 SRC0_ELEM(ELEM_Z), 1421 SRC0_NEG(0), 1422 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1423 SRC1_REL(ABSOLUTE), 1424 SRC1_ELEM(ELEM_Z), 1425 SRC1_NEG(0), 1426 INDEX_MODE(SQ_INDEX_LOOP), 1427 PRED_SEL(SQ_PRED_SEL_OFF), 1428 LAST(0)); 1429 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1430 SRC0_ABS(0), 1431 SRC1_ABS(0), 1432 UPDATE_EXECUTE_MASK(0), 1433 UPDATE_PRED(0), 1434 WRITE_MASK(0), 1435 FOG_MERGE(0), 1436 OMOD(SQ_ALU_OMOD_OFF), 1437 ALU_INST(SQ_OP2_INST_DOT4), 1438 BANK_SWIZZLE(SQ_ALU_VEC_012), 1439 DST_GPR(3), 1440 DST_REL(ABSOLUTE), 1441 DST_ELEM(ELEM_Z), 1442 CLAMP(0)); 1443 1444 /* 17 srcX.w DOT4 - mask */ 1445 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1446 SRC0_REL(ABSOLUTE), 1447 SRC0_ELEM(ELEM_W), 1448 SRC0_NEG(0), 1449 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1450 SRC1_REL(ABSOLUTE), 1451 SRC1_ELEM(ELEM_W), 1452 SRC1_NEG(0), 1453 INDEX_MODE(SQ_INDEX_LOOP), 1454 PRED_SEL(SQ_PRED_SEL_OFF), 1455 LAST(1)); 1456 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1457 SRC0_ABS(0), 1458 SRC1_ABS(0), 1459 UPDATE_EXECUTE_MASK(0), 1460 UPDATE_PRED(0), 1461 WRITE_MASK(0), 1462 FOG_MERGE(0), 1463 OMOD(SQ_ALU_OMOD_OFF), 1464 ALU_INST(SQ_OP2_INST_DOT4), 1465 BANK_SWIZZLE(SQ_ALU_VEC_012), 1466 DST_GPR(3), 1467 DST_REL(ABSOLUTE), 1468 DST_ELEM(ELEM_W), 1469 CLAMP(0)); 1470 1471 /* 18 srcY.x DOT4 - mask */ 1472 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1473 SRC0_REL(ABSOLUTE), 1474 SRC0_ELEM(ELEM_X), 1475 SRC0_NEG(0), 1476 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1477 SRC1_REL(ABSOLUTE), 1478 SRC1_ELEM(ELEM_X), 1479 SRC1_NEG(0), 1480 INDEX_MODE(SQ_INDEX_LOOP), 1481 PRED_SEL(SQ_PRED_SEL_OFF), 1482 LAST(0)); 1483 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1484 SRC0_ABS(0), 1485 SRC1_ABS(0), 1486 UPDATE_EXECUTE_MASK(0), 1487 UPDATE_PRED(0), 1488 WRITE_MASK(0), 1489 FOG_MERGE(0), 1490 OMOD(SQ_ALU_OMOD_OFF), 1491 ALU_INST(SQ_OP2_INST_DOT4), 1492 BANK_SWIZZLE(SQ_ALU_VEC_012), 1493 DST_GPR(3), 1494 DST_REL(ABSOLUTE), 1495 DST_ELEM(ELEM_X), 1496 CLAMP(0)); 1497 1498 /* 19 srcY.y DOT4 - mask */ 1499 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1500 SRC0_REL(ABSOLUTE), 1501 SRC0_ELEM(ELEM_Y), 1502 SRC0_NEG(0), 1503 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1504 SRC1_REL(ABSOLUTE), 1505 SRC1_ELEM(ELEM_Y), 1506 SRC1_NEG(0), 1507 INDEX_MODE(SQ_INDEX_LOOP), 1508 PRED_SEL(SQ_PRED_SEL_OFF), 1509 LAST(0)); 1510 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1511 SRC0_ABS(0), 1512 SRC1_ABS(0), 1513 UPDATE_EXECUTE_MASK(0), 1514 UPDATE_PRED(0), 1515 WRITE_MASK(1), 1516 FOG_MERGE(0), 1517 OMOD(SQ_ALU_OMOD_OFF), 1518 ALU_INST(SQ_OP2_INST_DOT4), 1519 BANK_SWIZZLE(SQ_ALU_VEC_012), 1520 DST_GPR(3), 1521 DST_REL(ABSOLUTE), 1522 DST_ELEM(ELEM_Y), 1523 CLAMP(0)); 1524 1525 /* 20 srcY.z DOT4 - mask */ 1526 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1527 SRC0_REL(ABSOLUTE), 1528 SRC0_ELEM(ELEM_Z), 1529 SRC0_NEG(0), 1530 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1531 SRC1_REL(ABSOLUTE), 1532 SRC1_ELEM(ELEM_Z), 1533 SRC1_NEG(0), 1534 INDEX_MODE(SQ_INDEX_LOOP), 1535 PRED_SEL(SQ_PRED_SEL_OFF), 1536 LAST(0)); 1537 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1538 SRC0_ABS(0), 1539 SRC1_ABS(0), 1540 UPDATE_EXECUTE_MASK(0), 1541 UPDATE_PRED(0), 1542 WRITE_MASK(0), 1543 FOG_MERGE(0), 1544 OMOD(SQ_ALU_OMOD_OFF), 1545 ALU_INST(SQ_OP2_INST_DOT4), 1546 BANK_SWIZZLE(SQ_ALU_VEC_012), 1547 DST_GPR(3), 1548 DST_REL(ABSOLUTE), 1549 DST_ELEM(ELEM_Z), 1550 CLAMP(0)); 1551 1552 /* 21 srcY.w DOT4 - mask */ 1553 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1554 SRC0_REL(ABSOLUTE), 1555 SRC0_ELEM(ELEM_W), 1556 SRC0_NEG(0), 1557 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1558 SRC1_REL(ABSOLUTE), 1559 SRC1_ELEM(ELEM_W), 1560 SRC1_NEG(0), 1561 INDEX_MODE(SQ_INDEX_LOOP), 1562 PRED_SEL(SQ_PRED_SEL_OFF), 1563 LAST(1)); 1564 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1565 SRC0_ABS(0), 1566 SRC1_ABS(0), 1567 UPDATE_EXECUTE_MASK(0), 1568 UPDATE_PRED(0), 1569 WRITE_MASK(0), 1570 FOG_MERGE(0), 1571 OMOD(SQ_ALU_OMOD_OFF), 1572 ALU_INST(SQ_OP2_INST_DOT4), 1573 BANK_SWIZZLE(SQ_ALU_VEC_012), 1574 DST_GPR(3), 1575 DST_REL(ABSOLUTE), 1576 DST_ELEM(ELEM_W), 1577 CLAMP(0)); 1578 1579 /* 22 maskX.x DOT4 - mask */ 1580 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1581 SRC0_REL(ABSOLUTE), 1582 SRC0_ELEM(ELEM_X), 1583 SRC0_NEG(0), 1584 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1585 SRC1_REL(ABSOLUTE), 1586 SRC1_ELEM(ELEM_X), 1587 SRC1_NEG(0), 1588 INDEX_MODE(SQ_INDEX_LOOP), 1589 PRED_SEL(SQ_PRED_SEL_OFF), 1590 LAST(0)); 1591 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1592 SRC0_ABS(0), 1593 SRC1_ABS(0), 1594 UPDATE_EXECUTE_MASK(0), 1595 UPDATE_PRED(0), 1596 WRITE_MASK(1), 1597 FOG_MERGE(0), 1598 OMOD(SQ_ALU_OMOD_OFF), 1599 ALU_INST(SQ_OP2_INST_DOT4), 1600 BANK_SWIZZLE(SQ_ALU_VEC_012), 1601 DST_GPR(4), 1602 DST_REL(ABSOLUTE), 1603 DST_ELEM(ELEM_X), 1604 CLAMP(0)); 1605 1606 /* 23 maskX.y DOT4 - mask */ 1607 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1608 SRC0_REL(ABSOLUTE), 1609 SRC0_ELEM(ELEM_Y), 1610 SRC0_NEG(0), 1611 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1612 SRC1_REL(ABSOLUTE), 1613 SRC1_ELEM(ELEM_Y), 1614 SRC1_NEG(0), 1615 INDEX_MODE(SQ_INDEX_LOOP), 1616 PRED_SEL(SQ_PRED_SEL_OFF), 1617 LAST(0)); 1618 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1619 SRC0_ABS(0), 1620 SRC1_ABS(0), 1621 UPDATE_EXECUTE_MASK(0), 1622 UPDATE_PRED(0), 1623 WRITE_MASK(0), 1624 FOG_MERGE(0), 1625 OMOD(SQ_ALU_OMOD_OFF), 1626 ALU_INST(SQ_OP2_INST_DOT4), 1627 BANK_SWIZZLE(SQ_ALU_VEC_012), 1628 DST_GPR(4), 1629 DST_REL(ABSOLUTE), 1630 DST_ELEM(ELEM_Y), 1631 CLAMP(0)); 1632 1633 /* 24 maskX.z DOT4 - mask */ 1634 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1635 SRC0_REL(ABSOLUTE), 1636 SRC0_ELEM(ELEM_Z), 1637 SRC0_NEG(0), 1638 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1639 SRC1_REL(ABSOLUTE), 1640 SRC1_ELEM(ELEM_Z), 1641 SRC1_NEG(0), 1642 INDEX_MODE(SQ_INDEX_LOOP), 1643 PRED_SEL(SQ_PRED_SEL_OFF), 1644 LAST(0)); 1645 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1646 SRC0_ABS(0), 1647 SRC1_ABS(0), 1648 UPDATE_EXECUTE_MASK(0), 1649 UPDATE_PRED(0), 1650 WRITE_MASK(0), 1651 FOG_MERGE(0), 1652 OMOD(SQ_ALU_OMOD_OFF), 1653 ALU_INST(SQ_OP2_INST_DOT4), 1654 BANK_SWIZZLE(SQ_ALU_VEC_012), 1655 DST_GPR(4), 1656 DST_REL(ABSOLUTE), 1657 DST_ELEM(ELEM_Z), 1658 CLAMP(0)); 1659 1660 /* 25 maskX.w DOT4 - mask */ 1661 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1662 SRC0_REL(ABSOLUTE), 1663 SRC0_ELEM(ELEM_W), 1664 SRC0_NEG(0), 1665 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1666 SRC1_REL(ABSOLUTE), 1667 SRC1_ELEM(ELEM_W), 1668 SRC1_NEG(0), 1669 INDEX_MODE(SQ_INDEX_LOOP), 1670 PRED_SEL(SQ_PRED_SEL_OFF), 1671 LAST(1)); 1672 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1673 SRC0_ABS(0), 1674 SRC1_ABS(0), 1675 UPDATE_EXECUTE_MASK(0), 1676 UPDATE_PRED(0), 1677 WRITE_MASK(0), 1678 FOG_MERGE(0), 1679 OMOD(SQ_ALU_OMOD_OFF), 1680 ALU_INST(SQ_OP2_INST_DOT4), 1681 BANK_SWIZZLE(SQ_ALU_VEC_012), 1682 DST_GPR(4), 1683 DST_REL(ABSOLUTE), 1684 DST_ELEM(ELEM_W), 1685 CLAMP(0)); 1686 1687 /* 26 maskY.x DOT4 - mask */ 1688 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1689 SRC0_REL(ABSOLUTE), 1690 SRC0_ELEM(ELEM_X), 1691 SRC0_NEG(0), 1692 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1693 SRC1_REL(ABSOLUTE), 1694 SRC1_ELEM(ELEM_X), 1695 SRC1_NEG(0), 1696 INDEX_MODE(SQ_INDEX_LOOP), 1697 PRED_SEL(SQ_PRED_SEL_OFF), 1698 LAST(0)); 1699 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1700 SRC0_ABS(0), 1701 SRC1_ABS(0), 1702 UPDATE_EXECUTE_MASK(0), 1703 UPDATE_PRED(0), 1704 WRITE_MASK(0), 1705 FOG_MERGE(0), 1706 OMOD(SQ_ALU_OMOD_OFF), 1707 ALU_INST(SQ_OP2_INST_DOT4), 1708 BANK_SWIZZLE(SQ_ALU_VEC_012), 1709 DST_GPR(4), 1710 DST_REL(ABSOLUTE), 1711 DST_ELEM(ELEM_X), 1712 CLAMP(0)); 1713 1714 /* 27 maskY.y DOT4 - mask */ 1715 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1716 SRC0_REL(ABSOLUTE), 1717 SRC0_ELEM(ELEM_Y), 1718 SRC0_NEG(0), 1719 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1720 SRC1_REL(ABSOLUTE), 1721 SRC1_ELEM(ELEM_Y), 1722 SRC1_NEG(0), 1723 INDEX_MODE(SQ_INDEX_LOOP), 1724 PRED_SEL(SQ_PRED_SEL_OFF), 1725 LAST(0)); 1726 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1727 SRC0_ABS(0), 1728 SRC1_ABS(0), 1729 UPDATE_EXECUTE_MASK(0), 1730 UPDATE_PRED(0), 1731 WRITE_MASK(1), 1732 FOG_MERGE(0), 1733 OMOD(SQ_ALU_OMOD_OFF), 1734 ALU_INST(SQ_OP2_INST_DOT4), 1735 BANK_SWIZZLE(SQ_ALU_VEC_012), 1736 DST_GPR(4), 1737 DST_REL(ABSOLUTE), 1738 DST_ELEM(ELEM_Y), 1739 CLAMP(0)); 1740 1741 /* 28 maskY.z DOT4 - mask */ 1742 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1743 SRC0_REL(ABSOLUTE), 1744 SRC0_ELEM(ELEM_Z), 1745 SRC0_NEG(0), 1746 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1747 SRC1_REL(ABSOLUTE), 1748 SRC1_ELEM(ELEM_Z), 1749 SRC1_NEG(0), 1750 INDEX_MODE(SQ_INDEX_LOOP), 1751 PRED_SEL(SQ_PRED_SEL_OFF), 1752 LAST(0)); 1753 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1754 SRC0_ABS(0), 1755 SRC1_ABS(0), 1756 UPDATE_EXECUTE_MASK(0), 1757 UPDATE_PRED(0), 1758 WRITE_MASK(0), 1759 FOG_MERGE(0), 1760 OMOD(SQ_ALU_OMOD_OFF), 1761 ALU_INST(SQ_OP2_INST_DOT4), 1762 BANK_SWIZZLE(SQ_ALU_VEC_012), 1763 DST_GPR(4), 1764 DST_REL(ABSOLUTE), 1765 DST_ELEM(ELEM_Z), 1766 CLAMP(0)); 1767 1768 /* 29 maskY.w DOT4 - mask */ 1769 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1770 SRC0_REL(ABSOLUTE), 1771 SRC0_ELEM(ELEM_W), 1772 SRC0_NEG(0), 1773 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1774 SRC1_REL(ABSOLUTE), 1775 SRC1_ELEM(ELEM_W), 1776 SRC1_NEG(0), 1777 INDEX_MODE(SQ_INDEX_LOOP), 1778 PRED_SEL(SQ_PRED_SEL_OFF), 1779 LAST(1)); 1780 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1781 SRC0_ABS(0), 1782 SRC1_ABS(0), 1783 UPDATE_EXECUTE_MASK(0), 1784 UPDATE_PRED(0), 1785 WRITE_MASK(0), 1786 FOG_MERGE(0), 1787 OMOD(SQ_ALU_OMOD_OFF), 1788 ALU_INST(SQ_OP2_INST_DOT4), 1789 BANK_SWIZZLE(SQ_ALU_VEC_012), 1790 DST_GPR(4), 1791 DST_REL(ABSOLUTE), 1792 DST_ELEM(ELEM_W), 1793 CLAMP(0)); 1794 1795 /* 30 srcX / w */ 1796 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1797 SRC0_REL(ABSOLUTE), 1798 SRC0_ELEM(ELEM_X), 1799 SRC0_NEG(0), 1800 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1801 SRC1_REL(ABSOLUTE), 1802 SRC1_ELEM(ELEM_W), 1803 SRC1_NEG(0), 1804 INDEX_MODE(SQ_INDEX_AR_X), 1805 PRED_SEL(SQ_PRED_SEL_OFF), 1806 LAST(1)); 1807 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1808 SRC0_ABS(0), 1809 SRC1_ABS(0), 1810 UPDATE_EXECUTE_MASK(0), 1811 UPDATE_PRED(0), 1812 WRITE_MASK(1), 1813 FOG_MERGE(0), 1814 OMOD(SQ_ALU_OMOD_OFF), 1815 ALU_INST(SQ_OP2_INST_MUL), 1816 BANK_SWIZZLE(SQ_ALU_VEC_012), 1817 DST_GPR(1), 1818 DST_REL(ABSOLUTE), 1819 DST_ELEM(ELEM_X), 1820 CLAMP(0)); 1821 1822 /* 31 srcY / h */ 1823 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1824 SRC0_REL(ABSOLUTE), 1825 SRC0_ELEM(ELEM_Y), 1826 SRC0_NEG(0), 1827 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1828 SRC1_REL(ABSOLUTE), 1829 SRC1_ELEM(ELEM_W), 1830 SRC1_NEG(0), 1831 INDEX_MODE(SQ_INDEX_AR_X), 1832 PRED_SEL(SQ_PRED_SEL_OFF), 1833 LAST(1)); 1834 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1835 SRC0_ABS(0), 1836 SRC1_ABS(0), 1837 UPDATE_EXECUTE_MASK(0), 1838 UPDATE_PRED(0), 1839 WRITE_MASK(1), 1840 FOG_MERGE(0), 1841 OMOD(SQ_ALU_OMOD_OFF), 1842 ALU_INST(SQ_OP2_INST_MUL), 1843 BANK_SWIZZLE(SQ_ALU_VEC_012), 1844 DST_GPR(1), 1845 DST_REL(ABSOLUTE), 1846 DST_ELEM(ELEM_Y), 1847 CLAMP(0)); 1848 1849 /* 32 maskX / w */ 1850 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 1851 SRC0_REL(ABSOLUTE), 1852 SRC0_ELEM(ELEM_X), 1853 SRC0_NEG(0), 1854 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1855 SRC1_REL(ABSOLUTE), 1856 SRC1_ELEM(ELEM_W), 1857 SRC1_NEG(0), 1858 INDEX_MODE(SQ_INDEX_AR_X), 1859 PRED_SEL(SQ_PRED_SEL_OFF), 1860 LAST(1)); 1861 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1862 SRC0_ABS(0), 1863 SRC1_ABS(0), 1864 UPDATE_EXECUTE_MASK(0), 1865 UPDATE_PRED(0), 1866 WRITE_MASK(1), 1867 FOG_MERGE(0), 1868 OMOD(SQ_ALU_OMOD_OFF), 1869 ALU_INST(SQ_OP2_INST_MUL), 1870 BANK_SWIZZLE(SQ_ALU_VEC_012), 1871 DST_GPR(0), 1872 DST_REL(ABSOLUTE), 1873 DST_ELEM(ELEM_X), 1874 CLAMP(0)); 1875 1876 /* 33 maskY / h */ 1877 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 1878 SRC0_REL(ABSOLUTE), 1879 SRC0_ELEM(ELEM_Y), 1880 SRC0_NEG(0), 1881 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1882 SRC1_REL(ABSOLUTE), 1883 SRC1_ELEM(ELEM_W), 1884 SRC1_NEG(0), 1885 INDEX_MODE(SQ_INDEX_AR_X), 1886 PRED_SEL(SQ_PRED_SEL_OFF), 1887 LAST(1)); 1888 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1889 SRC0_ABS(0), 1890 SRC1_ABS(0), 1891 UPDATE_EXECUTE_MASK(0), 1892 UPDATE_PRED(0), 1893 WRITE_MASK(1), 1894 FOG_MERGE(0), 1895 OMOD(SQ_ALU_OMOD_OFF), 1896 ALU_INST(SQ_OP2_INST_MUL), 1897 BANK_SWIZZLE(SQ_ALU_VEC_012), 1898 DST_GPR(0), 1899 DST_REL(ABSOLUTE), 1900 DST_ELEM(ELEM_Y), 1901 CLAMP(0)); 1902 1903 /* 34 srcX.x DOT4 - non-mask */ 1904 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1905 SRC0_REL(ABSOLUTE), 1906 SRC0_ELEM(ELEM_X), 1907 SRC0_NEG(0), 1908 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1909 SRC1_REL(ABSOLUTE), 1910 SRC1_ELEM(ELEM_X), 1911 SRC1_NEG(0), 1912 INDEX_MODE(SQ_INDEX_LOOP), 1913 PRED_SEL(SQ_PRED_SEL_OFF), 1914 LAST(0)); 1915 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1916 SRC0_ABS(0), 1917 SRC1_ABS(0), 1918 UPDATE_EXECUTE_MASK(0), 1919 UPDATE_PRED(0), 1920 WRITE_MASK(1), 1921 FOG_MERGE(0), 1922 OMOD(SQ_ALU_OMOD_OFF), 1923 ALU_INST(SQ_OP2_INST_DOT4), 1924 BANK_SWIZZLE(SQ_ALU_VEC_012), 1925 DST_GPR(2), 1926 DST_REL(ABSOLUTE), 1927 DST_ELEM(ELEM_X), 1928 CLAMP(0)); 1929 1930 /* 35 srcX.y DOT4 - non-mask */ 1931 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1932 SRC0_REL(ABSOLUTE), 1933 SRC0_ELEM(ELEM_Y), 1934 SRC0_NEG(0), 1935 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1936 SRC1_REL(ABSOLUTE), 1937 SRC1_ELEM(ELEM_Y), 1938 SRC1_NEG(0), 1939 INDEX_MODE(SQ_INDEX_LOOP), 1940 PRED_SEL(SQ_PRED_SEL_OFF), 1941 LAST(0)); 1942 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1943 SRC0_ABS(0), 1944 SRC1_ABS(0), 1945 UPDATE_EXECUTE_MASK(0), 1946 UPDATE_PRED(0), 1947 WRITE_MASK(0), 1948 FOG_MERGE(0), 1949 OMOD(SQ_ALU_OMOD_OFF), 1950 ALU_INST(SQ_OP2_INST_DOT4), 1951 BANK_SWIZZLE(SQ_ALU_VEC_012), 1952 DST_GPR(2), 1953 DST_REL(ABSOLUTE), 1954 DST_ELEM(ELEM_Y), 1955 CLAMP(0)); 1956 1957 /* 36 srcX.z DOT4 - non-mask */ 1958 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1959 SRC0_REL(ABSOLUTE), 1960 SRC0_ELEM(ELEM_Z), 1961 SRC0_NEG(0), 1962 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1963 SRC1_REL(ABSOLUTE), 1964 SRC1_ELEM(ELEM_Z), 1965 SRC1_NEG(0), 1966 INDEX_MODE(SQ_INDEX_LOOP), 1967 PRED_SEL(SQ_PRED_SEL_OFF), 1968 LAST(0)); 1969 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1970 SRC0_ABS(0), 1971 SRC1_ABS(0), 1972 UPDATE_EXECUTE_MASK(0), 1973 UPDATE_PRED(0), 1974 WRITE_MASK(0), 1975 FOG_MERGE(0), 1976 OMOD(SQ_ALU_OMOD_OFF), 1977 ALU_INST(SQ_OP2_INST_DOT4), 1978 BANK_SWIZZLE(SQ_ALU_VEC_012), 1979 DST_GPR(2), 1980 DST_REL(ABSOLUTE), 1981 DST_ELEM(ELEM_Z), 1982 CLAMP(0)); 1983 1984 /* 37 srcX.w DOT4 - non-mask */ 1985 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1986 SRC0_REL(ABSOLUTE), 1987 SRC0_ELEM(ELEM_W), 1988 SRC0_NEG(0), 1989 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1990 SRC1_REL(ABSOLUTE), 1991 SRC1_ELEM(ELEM_W), 1992 SRC1_NEG(0), 1993 INDEX_MODE(SQ_INDEX_LOOP), 1994 PRED_SEL(SQ_PRED_SEL_OFF), 1995 LAST(1)); 1996 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1997 SRC0_ABS(0), 1998 SRC1_ABS(0), 1999 UPDATE_EXECUTE_MASK(0), 2000 UPDATE_PRED(0), 2001 WRITE_MASK(0), 2002 FOG_MERGE(0), 2003 OMOD(SQ_ALU_OMOD_OFF), 2004 ALU_INST(SQ_OP2_INST_DOT4), 2005 BANK_SWIZZLE(SQ_ALU_VEC_012), 2006 DST_GPR(2), 2007 DST_REL(ABSOLUTE), 2008 DST_ELEM(ELEM_W), 2009 CLAMP(0)); 2010 2011 /* 38 srcY.x DOT4 - non-mask */ 2012 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2013 SRC0_REL(ABSOLUTE), 2014 SRC0_ELEM(ELEM_X), 2015 SRC0_NEG(0), 2016 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2017 SRC1_REL(ABSOLUTE), 2018 SRC1_ELEM(ELEM_X), 2019 SRC1_NEG(0), 2020 INDEX_MODE(SQ_INDEX_LOOP), 2021 PRED_SEL(SQ_PRED_SEL_OFF), 2022 LAST(0)); 2023 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2024 SRC0_ABS(0), 2025 SRC1_ABS(0), 2026 UPDATE_EXECUTE_MASK(0), 2027 UPDATE_PRED(0), 2028 WRITE_MASK(0), 2029 FOG_MERGE(0), 2030 OMOD(SQ_ALU_OMOD_OFF), 2031 ALU_INST(SQ_OP2_INST_DOT4), 2032 BANK_SWIZZLE(SQ_ALU_VEC_012), 2033 DST_GPR(2), 2034 DST_REL(ABSOLUTE), 2035 DST_ELEM(ELEM_X), 2036 CLAMP(0)); 2037 2038 /* 39 srcY.y DOT4 - non-mask */ 2039 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2040 SRC0_REL(ABSOLUTE), 2041 SRC0_ELEM(ELEM_Y), 2042 SRC0_NEG(0), 2043 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2044 SRC1_REL(ABSOLUTE), 2045 SRC1_ELEM(ELEM_Y), 2046 SRC1_NEG(0), 2047 INDEX_MODE(SQ_INDEX_LOOP), 2048 PRED_SEL(SQ_PRED_SEL_OFF), 2049 LAST(0)); 2050 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2051 SRC0_ABS(0), 2052 SRC1_ABS(0), 2053 UPDATE_EXECUTE_MASK(0), 2054 UPDATE_PRED(0), 2055 WRITE_MASK(1), 2056 FOG_MERGE(0), 2057 OMOD(SQ_ALU_OMOD_OFF), 2058 ALU_INST(SQ_OP2_INST_DOT4), 2059 BANK_SWIZZLE(SQ_ALU_VEC_012), 2060 DST_GPR(2), 2061 DST_REL(ABSOLUTE), 2062 DST_ELEM(ELEM_Y), 2063 CLAMP(0)); 2064 2065 /* 40 srcY.z DOT4 - non-mask */ 2066 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2067 SRC0_REL(ABSOLUTE), 2068 SRC0_ELEM(ELEM_Z), 2069 SRC0_NEG(0), 2070 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2071 SRC1_REL(ABSOLUTE), 2072 SRC1_ELEM(ELEM_Z), 2073 SRC1_NEG(0), 2074 INDEX_MODE(SQ_INDEX_LOOP), 2075 PRED_SEL(SQ_PRED_SEL_OFF), 2076 LAST(0)); 2077 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2078 SRC0_ABS(0), 2079 SRC1_ABS(0), 2080 UPDATE_EXECUTE_MASK(0), 2081 UPDATE_PRED(0), 2082 WRITE_MASK(0), 2083 FOG_MERGE(0), 2084 OMOD(SQ_ALU_OMOD_OFF), 2085 ALU_INST(SQ_OP2_INST_DOT4), 2086 BANK_SWIZZLE(SQ_ALU_VEC_012), 2087 DST_GPR(2), 2088 DST_REL(ABSOLUTE), 2089 DST_ELEM(ELEM_Z), 2090 CLAMP(0)); 2091 2092 /* 41 srcY.w DOT4 - non-mask */ 2093 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2094 SRC0_REL(ABSOLUTE), 2095 SRC0_ELEM(ELEM_W), 2096 SRC0_NEG(0), 2097 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2098 SRC1_REL(ABSOLUTE), 2099 SRC1_ELEM(ELEM_W), 2100 SRC1_NEG(0), 2101 INDEX_MODE(SQ_INDEX_LOOP), 2102 PRED_SEL(SQ_PRED_SEL_OFF), 2103 LAST(1)); 2104 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2105 SRC0_ABS(0), 2106 SRC1_ABS(0), 2107 UPDATE_EXECUTE_MASK(0), 2108 UPDATE_PRED(0), 2109 WRITE_MASK(0), 2110 FOG_MERGE(0), 2111 OMOD(SQ_ALU_OMOD_OFF), 2112 ALU_INST(SQ_OP2_INST_DOT4), 2113 BANK_SWIZZLE(SQ_ALU_VEC_012), 2114 DST_GPR(2), 2115 DST_REL(ABSOLUTE), 2116 DST_ELEM(ELEM_W), 2117 CLAMP(0)); 2118 2119 /* 42 srcX / w */ 2120 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2121 SRC0_REL(ABSOLUTE), 2122 SRC0_ELEM(ELEM_X), 2123 SRC0_NEG(0), 2124 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 2125 SRC1_REL(ABSOLUTE), 2126 SRC1_ELEM(ELEM_W), 2127 SRC1_NEG(0), 2128 INDEX_MODE(SQ_INDEX_AR_X), 2129 PRED_SEL(SQ_PRED_SEL_OFF), 2130 LAST(1)); 2131 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2132 SRC0_ABS(0), 2133 SRC1_ABS(0), 2134 UPDATE_EXECUTE_MASK(0), 2135 UPDATE_PRED(0), 2136 WRITE_MASK(1), 2137 FOG_MERGE(0), 2138 OMOD(SQ_ALU_OMOD_OFF), 2139 ALU_INST(SQ_OP2_INST_MUL), 2140 BANK_SWIZZLE(SQ_ALU_VEC_012), 2141 DST_GPR(0), 2142 DST_REL(ABSOLUTE), 2143 DST_ELEM(ELEM_X), 2144 CLAMP(0)); 2145 2146 /* 43 srcY / h */ 2147 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2148 SRC0_REL(ABSOLUTE), 2149 SRC0_ELEM(ELEM_Y), 2150 SRC0_NEG(0), 2151 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2152 SRC1_REL(ABSOLUTE), 2153 SRC1_ELEM(ELEM_W), 2154 SRC1_NEG(0), 2155 INDEX_MODE(SQ_INDEX_AR_X), 2156 PRED_SEL(SQ_PRED_SEL_OFF), 2157 LAST(1)); 2158 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2159 SRC0_ABS(0), 2160 SRC1_ABS(0), 2161 UPDATE_EXECUTE_MASK(0), 2162 UPDATE_PRED(0), 2163 WRITE_MASK(1), 2164 FOG_MERGE(0), 2165 OMOD(SQ_ALU_OMOD_OFF), 2166 ALU_INST(SQ_OP2_INST_MUL), 2167 BANK_SWIZZLE(SQ_ALU_VEC_012), 2168 DST_GPR(0), 2169 DST_REL(ABSOLUTE), 2170 DST_ELEM(ELEM_Y), 2171 CLAMP(0)); 2172 2173 /* 44/45 - dst - mask */ 2174 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2175 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2176 FETCH_WHOLE_QUAD(0), 2177 BUFFER_ID(0), 2178 SRC_GPR(0), 2179 SRC_REL(ABSOLUTE), 2180 SRC_SEL_X(SQ_SEL_X), 2181 MEGA_FETCH_COUNT(24)); 2182 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2183 DST_REL(0), 2184 DST_SEL_X(SQ_SEL_X), 2185 DST_SEL_Y(SQ_SEL_Y), 2186 DST_SEL_Z(SQ_SEL_0), 2187 DST_SEL_W(SQ_SEL_1), 2188 USE_CONST_FIELDS(0), 2189 DATA_FORMAT(FMT_32_32_FLOAT), 2190 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2191 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2192 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2193 shader[i++] = VTX_DWORD2(OFFSET(0), 2194 ENDIAN_SWAP(ENDIAN_NONE), 2195 CONST_BUF_NO_STRIDE(0), 2196 MEGA_FETCH(1)); 2197 shader[i++] = VTX_DWORD_PAD; 2198 /* 46/47 - src */ 2199 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2200 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2201 FETCH_WHOLE_QUAD(0), 2202 BUFFER_ID(0), 2203 SRC_GPR(0), 2204 SRC_REL(ABSOLUTE), 2205 SRC_SEL_X(SQ_SEL_X), 2206 MEGA_FETCH_COUNT(8)); 2207 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2208 DST_REL(0), 2209 DST_SEL_X(SQ_SEL_X), 2210 DST_SEL_Y(SQ_SEL_Y), 2211 DST_SEL_Z(SQ_SEL_1), 2212 DST_SEL_W(SQ_SEL_0), 2213 USE_CONST_FIELDS(0), 2214 DATA_FORMAT(FMT_32_32_FLOAT), 2215 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2216 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2217 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2218 shader[i++] = VTX_DWORD2(OFFSET(8), 2219 ENDIAN_SWAP(ENDIAN_NONE), 2220 CONST_BUF_NO_STRIDE(0), 2221 MEGA_FETCH(0)); 2222 shader[i++] = VTX_DWORD_PAD; 2223 /* 48/49 - mask */ 2224 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2225 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2226 FETCH_WHOLE_QUAD(0), 2227 BUFFER_ID(0), 2228 SRC_GPR(0), 2229 SRC_REL(ABSOLUTE), 2230 SRC_SEL_X(SQ_SEL_X), 2231 MEGA_FETCH_COUNT(8)); 2232 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2233 DST_REL(0), 2234 DST_SEL_X(SQ_SEL_X), 2235 DST_SEL_Y(SQ_SEL_Y), 2236 DST_SEL_Z(SQ_SEL_1), 2237 DST_SEL_W(SQ_SEL_0), 2238 USE_CONST_FIELDS(0), 2239 DATA_FORMAT(FMT_32_32_FLOAT), 2240 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2241 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2242 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2243 shader[i++] = VTX_DWORD2(OFFSET(16), 2244 ENDIAN_SWAP(ENDIAN_NONE), 2245 CONST_BUF_NO_STRIDE(0), 2246 MEGA_FETCH(0)); 2247 shader[i++] = VTX_DWORD_PAD; 2248 2249 /* 50/51 - dst - non-mask */ 2250 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2251 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2252 FETCH_WHOLE_QUAD(0), 2253 BUFFER_ID(0), 2254 SRC_GPR(0), 2255 SRC_REL(ABSOLUTE), 2256 SRC_SEL_X(SQ_SEL_X), 2257 MEGA_FETCH_COUNT(16)); 2258 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2259 DST_REL(0), 2260 DST_SEL_X(SQ_SEL_X), 2261 DST_SEL_Y(SQ_SEL_Y), 2262 DST_SEL_Z(SQ_SEL_0), 2263 DST_SEL_W(SQ_SEL_1), 2264 USE_CONST_FIELDS(0), 2265 DATA_FORMAT(FMT_32_32_FLOAT), 2266 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2267 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2268 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2269 shader[i++] = VTX_DWORD2(OFFSET(0), 2270 ENDIAN_SWAP(ENDIAN_NONE), 2271 CONST_BUF_NO_STRIDE(0), 2272 MEGA_FETCH(1)); 2273 shader[i++] = VTX_DWORD_PAD; 2274 /* 52/53 - src */ 2275 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2276 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2277 FETCH_WHOLE_QUAD(0), 2278 BUFFER_ID(0), 2279 SRC_GPR(0), 2280 SRC_REL(ABSOLUTE), 2281 SRC_SEL_X(SQ_SEL_X), 2282 MEGA_FETCH_COUNT(8)); 2283 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2284 DST_REL(0), 2285 DST_SEL_X(SQ_SEL_X), 2286 DST_SEL_Y(SQ_SEL_Y), 2287 DST_SEL_Z(SQ_SEL_1), 2288 DST_SEL_W(SQ_SEL_0), 2289 USE_CONST_FIELDS(0), 2290 DATA_FORMAT(FMT_32_32_FLOAT), 2291 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2292 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2293 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2294 shader[i++] = VTX_DWORD2(OFFSET(8), 2295 ENDIAN_SWAP(ENDIAN_NONE), 2296 CONST_BUF_NO_STRIDE(0), 2297 MEGA_FETCH(0)); 2298 shader[i++] = VTX_DWORD_PAD; 2299 2300 return i; 2301} 2302 2303/* comp ps --------------------------------------- */ 2304int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2305{ 2306 int i = 0; 2307 2308 /* 0 */ 2309 shader[i++] = CF_DWORD0(ADDR(3)); 2310 shader[i++] = CF_DWORD1(POP_COUNT(0), 2311 CF_CONST(0), 2312 COND(SQ_CF_COND_BOOL), 2313 I_COUNT(0), 2314 CALL_COUNT(0), 2315 END_OF_PROGRAM(0), 2316 VALID_PIXEL_MODE(0), 2317 CF_INST(SQ_CF_INST_CALL), 2318 WHOLE_QUAD_MODE(0), 2319 BARRIER(0)); 2320 /* 1 */ 2321 shader[i++] = CF_DWORD0(ADDR(7)); 2322 shader[i++] = CF_DWORD1(POP_COUNT(0), 2323 CF_CONST(0), 2324 COND(SQ_CF_COND_NOT_BOOL), 2325 I_COUNT(0), 2326 CALL_COUNT(0), 2327 END_OF_PROGRAM(0), 2328 VALID_PIXEL_MODE(0), 2329 CF_INST(SQ_CF_INST_CALL), 2330 WHOLE_QUAD_MODE(0), 2331 BARRIER(0)); 2332 /* 2 */ 2333 shader[i++] = CF_DWORD0(ADDR(0)); 2334 shader[i++] = CF_DWORD1(POP_COUNT(0), 2335 CF_CONST(0), 2336 COND(SQ_CF_COND_ACTIVE), 2337 I_COUNT(0), 2338 CALL_COUNT(0), 2339 END_OF_PROGRAM(1), 2340 VALID_PIXEL_MODE(0), 2341 CF_INST(SQ_CF_INST_NOP), 2342 WHOLE_QUAD_MODE(0), 2343 BARRIER(1)); 2344 2345 /* 3 - mask sub */ 2346 shader[i++] = CF_DWORD0(ADDR(14)); 2347 shader[i++] = CF_DWORD1(POP_COUNT(0), 2348 CF_CONST(0), 2349 COND(SQ_CF_COND_ACTIVE), 2350 I_COUNT(2), 2351 CALL_COUNT(0), 2352 END_OF_PROGRAM(0), 2353 VALID_PIXEL_MODE(0), 2354 CF_INST(SQ_CF_INST_TEX), 2355 WHOLE_QUAD_MODE(0), 2356 BARRIER(1)); 2357 2358 /* 4 */ 2359 shader[i++] = CF_ALU_DWORD0(ADDR(10), 2360 KCACHE_BANK0(0), 2361 KCACHE_BANK1(0), 2362 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2363 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2364 KCACHE_ADDR0(0), 2365 KCACHE_ADDR1(0), 2366 I_COUNT(4), 2367 USES_WATERFALL(0), 2368 CF_INST(SQ_CF_INST_ALU), 2369 WHOLE_QUAD_MODE(0), 2370 BARRIER(1)); 2371 2372 /* 5 */ 2373 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2374 TYPE(SQ_EXPORT_PIXEL), 2375 RW_GPR(2), 2376 RW_REL(ABSOLUTE), 2377 INDEX_GPR(0), 2378 ELEM_SIZE(1)); 2379 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2380 SRC_SEL_Y(SQ_SEL_Y), 2381 SRC_SEL_Z(SQ_SEL_Z), 2382 SRC_SEL_W(SQ_SEL_W), 2383 R6xx_ELEM_LOOP(0), 2384 BURST_COUNT(1), 2385 END_OF_PROGRAM(0), 2386 VALID_PIXEL_MODE(0), 2387 CF_INST(SQ_CF_INST_EXPORT_DONE), 2388 WHOLE_QUAD_MODE(0), 2389 BARRIER(1)); 2390 /* 6 */ 2391 shader[i++] = CF_DWORD0(ADDR(0)); 2392 shader[i++] = CF_DWORD1(POP_COUNT(0), 2393 CF_CONST(0), 2394 COND(SQ_CF_COND_ACTIVE), 2395 I_COUNT(0), 2396 CALL_COUNT(0), 2397 END_OF_PROGRAM(0), 2398 VALID_PIXEL_MODE(0), 2399 CF_INST(SQ_CF_INST_RETURN), 2400 WHOLE_QUAD_MODE(0), 2401 BARRIER(1)); 2402 2403 /* 7 non-mask sub */ 2404 shader[i++] = CF_DWORD0(ADDR(18)); 2405 shader[i++] = CF_DWORD1(POP_COUNT(0), 2406 CF_CONST(0), 2407 COND(SQ_CF_COND_ACTIVE), 2408 I_COUNT(1), 2409 CALL_COUNT(0), 2410 END_OF_PROGRAM(0), 2411 VALID_PIXEL_MODE(0), 2412 CF_INST(SQ_CF_INST_TEX), 2413 WHOLE_QUAD_MODE(0), 2414 BARRIER(1)); 2415 /* 8 */ 2416 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2417 TYPE(SQ_EXPORT_PIXEL), 2418 RW_GPR(0), 2419 RW_REL(ABSOLUTE), 2420 INDEX_GPR(0), 2421 ELEM_SIZE(1)); 2422 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2423 SRC_SEL_Y(SQ_SEL_Y), 2424 SRC_SEL_Z(SQ_SEL_Z), 2425 SRC_SEL_W(SQ_SEL_W), 2426 R6xx_ELEM_LOOP(0), 2427 BURST_COUNT(1), 2428 END_OF_PROGRAM(0), 2429 VALID_PIXEL_MODE(0), 2430 CF_INST(SQ_CF_INST_EXPORT_DONE), 2431 WHOLE_QUAD_MODE(0), 2432 BARRIER(1)); 2433 /* 9 */ 2434 shader[i++] = CF_DWORD0(ADDR(0)); 2435 shader[i++] = CF_DWORD1(POP_COUNT(0), 2436 CF_CONST(0), 2437 COND(SQ_CF_COND_ACTIVE), 2438 I_COUNT(0), 2439 CALL_COUNT(0), 2440 END_OF_PROGRAM(0), 2441 VALID_PIXEL_MODE(0), 2442 CF_INST(SQ_CF_INST_RETURN), 2443 WHOLE_QUAD_MODE(0), 2444 BARRIER(1)); 2445 2446 /* 10 - alu 0 */ 2447 /* MUL gpr[2].x gpr[1].x gpr[0].x */ 2448 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2449 SRC0_REL(ABSOLUTE), 2450 SRC0_ELEM(ELEM_X), 2451 SRC0_NEG(0), 2452 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2453 SRC1_REL(ABSOLUTE), 2454 SRC1_ELEM(ELEM_X), 2455 SRC1_NEG(0), 2456 INDEX_MODE(SQ_INDEX_LOOP), 2457 PRED_SEL(SQ_PRED_SEL_OFF), 2458 LAST(0)); 2459 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2460 SRC0_ABS(0), 2461 SRC1_ABS(0), 2462 UPDATE_EXECUTE_MASK(0), 2463 UPDATE_PRED(0), 2464 WRITE_MASK(1), 2465 FOG_MERGE(0), 2466 OMOD(SQ_ALU_OMOD_OFF), 2467 ALU_INST(SQ_OP2_INST_MUL), 2468 BANK_SWIZZLE(SQ_ALU_VEC_012), 2469 DST_GPR(2), 2470 DST_REL(ABSOLUTE), 2471 DST_ELEM(ELEM_X), 2472 CLAMP(1)); 2473 /* 11 - alu 1 */ 2474 /* MUL gpr[2].y gpr[1].y gpr[0].y */ 2475 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2476 SRC0_REL(ABSOLUTE), 2477 SRC0_ELEM(ELEM_Y), 2478 SRC0_NEG(0), 2479 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2480 SRC1_REL(ABSOLUTE), 2481 SRC1_ELEM(ELEM_Y), 2482 SRC1_NEG(0), 2483 INDEX_MODE(SQ_INDEX_LOOP), 2484 PRED_SEL(SQ_PRED_SEL_OFF), 2485 LAST(0)); 2486 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2487 SRC0_ABS(0), 2488 SRC1_ABS(0), 2489 UPDATE_EXECUTE_MASK(0), 2490 UPDATE_PRED(0), 2491 WRITE_MASK(1), 2492 FOG_MERGE(0), 2493 OMOD(SQ_ALU_OMOD_OFF), 2494 ALU_INST(SQ_OP2_INST_MUL), 2495 BANK_SWIZZLE(SQ_ALU_VEC_012), 2496 DST_GPR(2), 2497 DST_REL(ABSOLUTE), 2498 DST_ELEM(ELEM_Y), 2499 CLAMP(1)); 2500 /* 12 - alu 2 */ 2501 /* MUL gpr[2].z gpr[1].z gpr[0].z */ 2502 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2503 SRC0_REL(ABSOLUTE), 2504 SRC0_ELEM(ELEM_Z), 2505 SRC0_NEG(0), 2506 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2507 SRC1_REL(ABSOLUTE), 2508 SRC1_ELEM(ELEM_Z), 2509 SRC1_NEG(0), 2510 INDEX_MODE(SQ_INDEX_LOOP), 2511 PRED_SEL(SQ_PRED_SEL_OFF), 2512 LAST(0)); 2513 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2514 SRC0_ABS(0), 2515 SRC1_ABS(0), 2516 UPDATE_EXECUTE_MASK(0), 2517 UPDATE_PRED(0), 2518 WRITE_MASK(1), 2519 FOG_MERGE(0), 2520 OMOD(SQ_ALU_OMOD_OFF), 2521 ALU_INST(SQ_OP2_INST_MUL), 2522 BANK_SWIZZLE(SQ_ALU_VEC_012), 2523 DST_GPR(2), 2524 DST_REL(ABSOLUTE), 2525 DST_ELEM(ELEM_Z), 2526 CLAMP(1)); 2527 /* 13 - alu 3 */ 2528 /* MUL gpr[2].w gpr[1].w gpr[0].w */ 2529 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2530 SRC0_REL(ABSOLUTE), 2531 SRC0_ELEM(ELEM_W), 2532 SRC0_NEG(0), 2533 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2534 SRC1_REL(ABSOLUTE), 2535 SRC1_ELEM(ELEM_W), 2536 SRC1_NEG(0), 2537 INDEX_MODE(SQ_INDEX_LOOP), 2538 PRED_SEL(SQ_PRED_SEL_OFF), 2539 LAST(1)); 2540 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2541 SRC0_ABS(0), 2542 SRC1_ABS(0), 2543 UPDATE_EXECUTE_MASK(0), 2544 UPDATE_PRED(0), 2545 WRITE_MASK(1), 2546 FOG_MERGE(0), 2547 OMOD(SQ_ALU_OMOD_OFF), 2548 ALU_INST(SQ_OP2_INST_MUL), 2549 BANK_SWIZZLE(SQ_ALU_VEC_012), 2550 DST_GPR(2), 2551 DST_REL(ABSOLUTE), 2552 DST_ELEM(ELEM_W), 2553 CLAMP(1)); 2554 2555 /* 14/15 - src - mask */ 2556 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2557 BC_FRAC_MODE(0), 2558 FETCH_WHOLE_QUAD(0), 2559 RESOURCE_ID(0), 2560 SRC_GPR(0), 2561 SRC_REL(ABSOLUTE), 2562 R7xx_ALT_CONST(0)); 2563 shader[i++] = TEX_DWORD1(DST_GPR(0), 2564 DST_REL(ABSOLUTE), 2565 DST_SEL_X(SQ_SEL_X), 2566 DST_SEL_Y(SQ_SEL_Y), 2567 DST_SEL_Z(SQ_SEL_Z), 2568 DST_SEL_W(SQ_SEL_W), 2569 LOD_BIAS(0), 2570 COORD_TYPE_X(TEX_NORMALIZED), 2571 COORD_TYPE_Y(TEX_NORMALIZED), 2572 COORD_TYPE_Z(TEX_NORMALIZED), 2573 COORD_TYPE_W(TEX_NORMALIZED)); 2574 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2575 OFFSET_Y(0), 2576 OFFSET_Z(0), 2577 SAMPLER_ID(0), 2578 SRC_SEL_X(SQ_SEL_X), 2579 SRC_SEL_Y(SQ_SEL_Y), 2580 SRC_SEL_Z(SQ_SEL_0), 2581 SRC_SEL_W(SQ_SEL_1)); 2582 shader[i++] = TEX_DWORD_PAD; 2583 /* 16/17 - mask */ 2584 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2585 BC_FRAC_MODE(0), 2586 FETCH_WHOLE_QUAD(0), 2587 RESOURCE_ID(1), 2588 SRC_GPR(1), 2589 SRC_REL(ABSOLUTE), 2590 R7xx_ALT_CONST(0)); 2591 shader[i++] = TEX_DWORD1(DST_GPR(1), 2592 DST_REL(ABSOLUTE), 2593 DST_SEL_X(SQ_SEL_X), 2594 DST_SEL_Y(SQ_SEL_Y), 2595 DST_SEL_Z(SQ_SEL_Z), 2596 DST_SEL_W(SQ_SEL_W), 2597 LOD_BIAS(0), 2598 COORD_TYPE_X(TEX_NORMALIZED), 2599 COORD_TYPE_Y(TEX_NORMALIZED), 2600 COORD_TYPE_Z(TEX_NORMALIZED), 2601 COORD_TYPE_W(TEX_NORMALIZED)); 2602 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2603 OFFSET_Y(0), 2604 OFFSET_Z(0), 2605 SAMPLER_ID(1), 2606 SRC_SEL_X(SQ_SEL_X), 2607 SRC_SEL_Y(SQ_SEL_Y), 2608 SRC_SEL_Z(SQ_SEL_0), 2609 SRC_SEL_W(SQ_SEL_1)); 2610 shader[i++] = TEX_DWORD_PAD; 2611 2612 /* 18/19 - src - non-mask */ 2613 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2614 BC_FRAC_MODE(0), 2615 FETCH_WHOLE_QUAD(0), 2616 RESOURCE_ID(0), 2617 SRC_GPR(0), 2618 SRC_REL(ABSOLUTE), 2619 R7xx_ALT_CONST(0)); 2620 shader[i++] = TEX_DWORD1(DST_GPR(0), 2621 DST_REL(ABSOLUTE), 2622 DST_SEL_X(SQ_SEL_X), 2623 DST_SEL_Y(SQ_SEL_Y), 2624 DST_SEL_Z(SQ_SEL_Z), 2625 DST_SEL_W(SQ_SEL_W), 2626 LOD_BIAS(0), 2627 COORD_TYPE_X(TEX_NORMALIZED), 2628 COORD_TYPE_Y(TEX_NORMALIZED), 2629 COORD_TYPE_Z(TEX_NORMALIZED), 2630 COORD_TYPE_W(TEX_NORMALIZED)); 2631 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2632 OFFSET_Y(0), 2633 OFFSET_Z(0), 2634 SAMPLER_ID(0), 2635 SRC_SEL_X(SQ_SEL_X), 2636 SRC_SEL_Y(SQ_SEL_Y), 2637 SRC_SEL_Z(SQ_SEL_0), 2638 SRC_SEL_W(SQ_SEL_1)); 2639 shader[i++] = TEX_DWORD_PAD; 2640 2641 return i; 2642} 2643