1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * Matthias Hopf <mhopf@suse.de> 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#include "xf86.h" 31 32#include <errno.h> 33 34#include "radeon.h" 35#include "r600_shader.h" 36#include "radeon_reg.h" 37#include "r600_reg.h" 38#include "r600_state.h" 39 40#include "radeon_vbo.h" 41#include "radeon_exa_shared.h" 42 43static const uint32_t R600_ROP[16] = { 44 RADEON_ROP3_ZERO, /* GXclear */ 45 RADEON_ROP3_DSa, /* Gxand */ 46 RADEON_ROP3_SDna, /* GXandReverse */ 47 RADEON_ROP3_S, /* GXcopy */ 48 RADEON_ROP3_DSna, /* GXandInverted */ 49 RADEON_ROP3_D, /* GXnoop */ 50 RADEON_ROP3_DSx, /* GXxor */ 51 RADEON_ROP3_DSo, /* GXor */ 52 RADEON_ROP3_DSon, /* GXnor */ 53 RADEON_ROP3_DSxn, /* GXequiv */ 54 RADEON_ROP3_Dn, /* GXinvert */ 55 RADEON_ROP3_SDno, /* GXorReverse */ 56 RADEON_ROP3_Sn, /* GXcopyInverted */ 57 RADEON_ROP3_DSno, /* GXorInverted */ 58 RADEON_ROP3_DSan, /* GXnand */ 59 RADEON_ROP3_ONE, /* GXset */ 60}; 61 62/* we try and batch operations together under KMS - 63 but it doesn't work yet without misrendering */ 64#define KMS_MULTI_OP 1 65 66/* Flush the indirect buffer to the kernel for submission to the card */ 67void R600CPFlushIndirect(ScrnInfoPtr pScrn) 68{ 69 radeon_cs_flush_indirect(pScrn); 70} 71 72void R600IBDiscard(ScrnInfoPtr pScrn) 73{ 74 radeon_ib_discard(pScrn); 75} 76 77void 78r600_wait_3d_idle_clean(ScrnInfoPtr pScrn) 79{ 80 RADEONInfoPtr info = RADEONPTR(pScrn); 81 82 //flush caches, don't generate timestamp 83 BEGIN_BATCH(5); 84 PACK3(IT_EVENT_WRITE, 1); 85 E32(CACHE_FLUSH_AND_INV_EVENT); 86 // wait for 3D idle clean 87 EREG(WAIT_UNTIL, (WAIT_3D_IDLE_bit | 88 WAIT_3D_IDLECLEAN_bit)); 89 END_BATCH(); 90} 91 92void 93r600_wait_3d_idle(ScrnInfoPtr pScrn) 94{ 95 RADEONInfoPtr info = RADEONPTR(pScrn); 96 97 BEGIN_BATCH(3); 98 EREG(WAIT_UNTIL, WAIT_3D_IDLE_bit); 99 END_BATCH(); 100} 101 102void 103r600_start_3d(ScrnInfoPtr pScrn) 104{ 105 RADEONInfoPtr info = RADEONPTR(pScrn); 106 107 if (info->ChipFamily < CHIP_FAMILY_RV770) { 108 BEGIN_BATCH(5); 109 PACK3(IT_START_3D_CMDBUF, 1); 110 E32(0); 111 } else 112 BEGIN_BATCH(3); 113 114 PACK3(IT_CONTEXT_CONTROL, 2); 115 E32(0x80000000); 116 E32(0x80000000); 117 END_BATCH(); 118 119} 120 121/* 122 * Setup of functional groups 123 */ 124 125// asic stack/thread/gpr limits - need to query the drm 126static void 127r600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 128{ 129 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 130 uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 131 RADEONInfoPtr info = RADEONPTR(pScrn); 132 133 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 134 (info->ChipFamily == CHIP_FAMILY_RV620) || 135 (info->ChipFamily == CHIP_FAMILY_RS780) || 136 (info->ChipFamily == CHIP_FAMILY_RS880) || 137 (info->ChipFamily == CHIP_FAMILY_RV710)) 138 sq_config = 0; // no VC 139 else 140 sq_config = VC_ENABLE_bit; 141 142 sq_config |= (DX9_CONSTS_bit | 143 ALU_INST_PREFER_VECTOR_bit | 144 (sq_conf->ps_prio << PS_PRIO_shift) | 145 (sq_conf->vs_prio << VS_PRIO_shift) | 146 (sq_conf->gs_prio << GS_PRIO_shift) | 147 (sq_conf->es_prio << ES_PRIO_shift)); 148 149 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 150 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 151 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 152 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 153 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 154 155 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 156 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 157 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 158 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 159 160 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 161 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 162 163 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 164 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 165 166 BEGIN_BATCH(8); 167 PACK0(SQ_CONFIG, 6); 168 E32(sq_config); 169 E32(sq_gpr_resource_mgmt_1); 170 E32(sq_gpr_resource_mgmt_2); 171 E32(sq_thread_resource_mgmt); 172 E32(sq_stack_resource_mgmt_1); 173 E32(sq_stack_resource_mgmt_2); 174 END_BATCH(); 175} 176 177void r600_set_blend_color(ScrnInfoPtr pScrn, float *color) 178{ 179 RADEONInfoPtr info = RADEONPTR(pScrn); 180 181 BEGIN_BATCH(2 + 4); 182 PACK0(CB_BLEND_RED, 4); 183 EFLOAT(color[0]); /* R */ 184 EFLOAT(color[1]); /* G */ 185 EFLOAT(color[2]); /* B */ 186 EFLOAT(color[3]); /* A */ 187 END_BATCH(); 188} 189 190 191void 192r600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 193{ 194 uint32_t cb_color_info, cb_color_control; 195 unsigned pitch, slice, h, array_mode; 196 RADEONInfoPtr info = RADEONPTR(pScrn); 197 198 199 if (cb_conf->surface) { 200 switch (cb_conf->surface->level[0].mode) { 201 case RADEON_SURF_MODE_1D: 202 array_mode = 2; 203 break; 204 case RADEON_SURF_MODE_2D: 205 array_mode = 4; 206 break; 207 default: 208 array_mode = 0; 209 break; 210 } 211 pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1; 212 slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1; 213 } else 214 { 215 array_mode = cb_conf->array_mode; 216 pitch = (cb_conf->w / 8) - 1; 217 h = RADEON_ALIGN(cb_conf->h, 8); 218 slice = ((cb_conf->w * h) / 64) - 1; 219 } 220 221 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 222 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 223 (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 224 (cb_conf->number_type << NUMBER_TYPE_shift) | 225 (cb_conf->comp_swap << COMP_SWAP_shift) | 226 (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 227 if (cb_conf->read_size) 228 cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 229 if (cb_conf->blend_clamp) 230 cb_color_info |= BLEND_CLAMP_bit; 231 if (cb_conf->clear_color) 232 cb_color_info |= CLEAR_COLOR_bit; 233 if (cb_conf->blend_bypass) 234 cb_color_info |= BLEND_BYPASS_bit; 235 if (cb_conf->blend_float32) 236 cb_color_info |= BLEND_FLOAT32_bit; 237 if (cb_conf->simple_float) 238 cb_color_info |= SIMPLE_FLOAT_bit; 239 if (cb_conf->round_mode) 240 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 241 if (cb_conf->tile_compact) 242 cb_color_info |= TILE_COMPACT_bit; 243 if (cb_conf->source_format) 244 cb_color_info |= SOURCE_FORMAT_bit; 245 246 BEGIN_BATCH(3 + 2); 247 EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 248 RELOC_BATCH(cb_conf->bo, 0, domain); 249 END_BATCH(); 250 251 // rv6xx workaround 252 if ((info->ChipFamily > CHIP_FAMILY_R600) && 253 (info->ChipFamily < CHIP_FAMILY_RV770)) { 254 BEGIN_BATCH(2); 255 PACK3(IT_SURFACE_BASE_UPDATE, 1); 256 E32((2 << cb_conf->id)); 257 END_BATCH(); 258 } 259 /* Set CMASK & TILE buffer to the offset of color buffer as 260 * we don't use those this shouldn't cause any issue and we 261 * then have a valid cmd stream 262 */ 263 BEGIN_BATCH(3 + 2); 264 EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 265 RELOC_BATCH(cb_conf->bo, 0, domain); 266 END_BATCH(); 267 BEGIN_BATCH(3 + 2); 268 EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 269 RELOC_BATCH(cb_conf->bo, 0, domain); 270 END_BATCH(); 271 BEGIN_BATCH(9); 272 // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 273 EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 274 (slice << SLICE_TILE_MAX_shift))); 275 EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 276 (0 << SLICE_MAX_shift))); 277 EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 278 (0 << FMASK_TILE_MAX_shift))); 279 END_BATCH(); 280 281 BEGIN_BATCH(3 + 2); 282 EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 283 RELOC_BATCH(cb_conf->bo, 0, domain); 284 END_BATCH(); 285 286 BEGIN_BATCH(9); 287 EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 288 cb_color_control = R600_ROP[cb_conf->rop] | 289 (cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift); 290 if (info->ChipFamily == CHIP_FAMILY_R600) { 291 /* no per-MRT blend on R600 */ 292 EREG(CB_COLOR_CONTROL, cb_color_control); 293 EREG(CB_BLEND_CONTROL, cb_conf->blendcntl); 294 } else { 295 if (cb_conf->blend_enable) 296 cb_color_control |= PER_MRT_BLEND_bit; 297 EREG(CB_COLOR_CONTROL, cb_color_control); 298 EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 299 } 300 END_BATCH(); 301} 302 303static void 304r600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 305 uint32_t size, uint64_t mc_addr, 306 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 307{ 308 RADEONInfoPtr info = RADEONPTR(pScrn); 309 uint32_t cp_coher_size; 310 if (size == 0xffffffff) 311 cp_coher_size = 0xffffffff; 312 else 313 cp_coher_size = ((size + 255) >> 8); 314 315 BEGIN_BATCH(5 + 2); 316 PACK3(IT_SURFACE_SYNC, 4); 317 E32(sync_type); 318 E32(cp_coher_size); 319 E32((mc_addr >> 8)); 320 E32(10); /* poll interval */ 321 RELOC_BATCH(bo, rdomains, wdomain); 322 END_BATCH(); 323} 324 325/* inserts a wait for vline in the command stream */ 326void 327r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 328 xf86CrtcPtr crtc, int start, int stop) 329{ 330 RADEONInfoPtr info = RADEONPTR(pScrn); 331 drmmode_crtc_private_ptr drmmode_crtc; 332 333 if (!crtc) 334 return; 335 336 if (!crtc->enabled) 337 return; 338 339 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 340 return; 341 342 start = max(start, crtc->y); 343 stop = min(stop, crtc->y + crtc->mode.VDisplay); 344 345 if (start >= stop) 346 return; 347 348 drmmode_crtc = crtc->driver_private; 349 350 BEGIN_BATCH(11); 351 /* set the VLINE range */ 352 EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 353 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 354 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 355 356 /* tell the CP to poll the VLINE state register */ 357 PACK3(IT_WAIT_REG_MEM, 6); 358 E32(IT_WAIT_REG | IT_WAIT_EQ); 359 E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 360 E32(0); 361 E32(0); // Ref value 362 E32(AVIVO_D1MODE_VLINE_STAT); // Mask 363 E32(10); // Wait interval 364 /* add crtc reloc */ 365 PACK3(IT_NOP, 1); 366 E32(drmmode_crtc->mode_crtc->crtc_id); 367 END_BATCH(); 368} 369 370void 371r600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 372{ 373 RADEONInfoPtr info = RADEONPTR(pScrn); 374 375 BEGIN_BATCH(8); 376 /* Interpolator setup */ 377 EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 378 PACK0(SPI_PS_IN_CONTROL_0, 3); 379 E32((num_interp << NUM_INTERP_shift)); 380 E32(0); 381 E32(0); 382 END_BATCH(); 383} 384 385void 386r600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 387{ 388 RADEONInfoPtr info = RADEONPTR(pScrn); 389 uint32_t sq_pgm_resources; 390 391 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 392 (fs_conf->stack_size << STACK_SIZE_shift)); 393 394 if (fs_conf->dx10_clamp) 395 sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 396 397 BEGIN_BATCH(3 + 2); 398 EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 399 RELOC_BATCH(fs_conf->bo, domain, 0); 400 END_BATCH(); 401 402 BEGIN_BATCH(6); 403 EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 404 EREG(SQ_PGM_CF_OFFSET_FS, 0); 405 END_BATCH(); 406} 407 408void 409r600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 410{ 411 RADEONInfoPtr info = RADEONPTR(pScrn); 412 uint32_t sq_pgm_resources; 413 414 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 415 (vs_conf->stack_size << STACK_SIZE_shift)); 416 417 if (vs_conf->dx10_clamp) 418 sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 419 if (vs_conf->fetch_cache_lines) 420 sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 421 if (vs_conf->uncached_first_inst) 422 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 423 424 /* flush SQ cache */ 425 r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 426 vs_conf->shader_size, vs_conf->shader_addr, 427 vs_conf->bo, domain, 0); 428 429 BEGIN_BATCH(3 + 2); 430 EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 431 RELOC_BATCH(vs_conf->bo, domain, 0); 432 END_BATCH(); 433 434 BEGIN_BATCH(6); 435 EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources); 436 EREG(SQ_PGM_CF_OFFSET_VS, 0); 437 END_BATCH(); 438} 439 440void 441r600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 442{ 443 RADEONInfoPtr info = RADEONPTR(pScrn); 444 uint32_t sq_pgm_resources; 445 446 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 447 (ps_conf->stack_size << STACK_SIZE_shift)); 448 449 if (ps_conf->dx10_clamp) 450 sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 451 if (ps_conf->fetch_cache_lines) 452 sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 453 if (ps_conf->uncached_first_inst) 454 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 455 if (ps_conf->clamp_consts) 456 sq_pgm_resources |= CLAMP_CONSTS_bit; 457 458 /* flush SQ cache */ 459 r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 460 ps_conf->shader_size, ps_conf->shader_addr, 461 ps_conf->bo, domain, 0); 462 463 BEGIN_BATCH(3 + 2); 464 EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 465 RELOC_BATCH(ps_conf->bo, domain, 0); 466 END_BATCH(); 467 468 BEGIN_BATCH(9); 469 EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources); 470 EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 471 EREG(SQ_PGM_CF_OFFSET_PS, 0); 472 END_BATCH(); 473} 474 475void 476r600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf) 477{ 478 RADEONInfoPtr info = RADEONPTR(pScrn); 479 int i; 480 const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 481 482 BEGIN_BATCH(2 + countreg); 483 PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 484 for (i = 0; i < countreg; i++) 485 EFLOAT(const_buf[i]); 486 END_BATCH(); 487} 488 489void 490r600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 491{ 492 RADEONInfoPtr info = RADEONPTR(pScrn); 493 /* bool register order is: ps, vs, gs; one register each 494 * 1 bits per bool; 32 bools each for ps, vs, gs. 495 */ 496 BEGIN_BATCH(3); 497 EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 498 END_BATCH(); 499} 500 501static void 502r600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 503{ 504 RADEONInfoPtr info = RADEONPTR(pScrn); 505 struct radeon_accel_state *accel_state = info->accel_state; 506 uint32_t sq_vtx_constant_word2; 507 508 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 509 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 510 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 511 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 512 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 513 if (res->clamp_x) 514 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 515 516 if (res->format_comp_all) 517 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 518 519 if (res->srf_mode_all) 520 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 521 522 /* flush vertex cache */ 523 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 524 (info->ChipFamily == CHIP_FAMILY_RV620) || 525 (info->ChipFamily == CHIP_FAMILY_RS780) || 526 (info->ChipFamily == CHIP_FAMILY_RS880) || 527 (info->ChipFamily == CHIP_FAMILY_RV710)) 528 r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 529 accel_state->vbo.vb_offset, 0, 530 res->bo, 531 domain, 0); 532 else 533 r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 534 accel_state->vbo.vb_offset, 0, 535 res->bo, 536 domain, 0); 537 538 BEGIN_BATCH(9 + 2); 539 PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 540 E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 541 E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 542 E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 543 E32(res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 544 E32(0); // 4: n/a 545 E32(0); // 5: n/a 546 E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 547 RELOC_BATCH(res->bo, domain, 0); 548 END_BATCH(); 549} 550 551void 552r600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 553{ 554 RADEONInfoPtr info = RADEONPTR(pScrn); 555 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 556 uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 557 uint32_t array_mode, pitch; 558 559 if (tex_res->surface) { 560 switch (tex_res->surface->level[0].mode) { 561 case RADEON_SURF_MODE_1D: 562 array_mode = 2; 563 break; 564 case RADEON_SURF_MODE_2D: 565 array_mode = 4; 566 break; 567 default: 568 array_mode = 0; 569 break; 570 } 571 pitch = tex_res->surface->level[0].nblk_x >> 3; 572 } else 573 { 574 array_mode = tex_res->tile_mode; 575 pitch = (tex_res->pitch + 7) >> 3; 576 } 577 578 sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 579 (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 580 581 if (tex_res->w) 582 sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) | 583 ((tex_res->w - 1) << TEX_WIDTH_shift)); 584 585 if (tex_res->tile_type) 586 sq_tex_resource_word0 |= TILE_TYPE_bit; 587 588 sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 589 590 if (tex_res->h) 591 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 592 if (tex_res->depth) 593 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 594 595 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 596 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 597 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 598 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 599 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 600 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 601 (tex_res->request_size << REQUEST_SIZE_shift) | 602 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 603 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 604 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 605 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 606 (tex_res->base_level << BASE_LEVEL_shift)); 607 608 if (tex_res->srf_mode_all) 609 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 610 if (tex_res->force_degamma) 611 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 612 613 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 614 (tex_res->base_array << BASE_ARRAY_shift) | 615 (tex_res->last_array << LAST_ARRAY_shift)); 616 617 sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 618 (tex_res->perf_modulation << PERF_MODULATION_shift) | 619 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 620 621 if (tex_res->interlaced) 622 sq_tex_resource_word6 |= INTERLACED_bit; 623 624 /* flush texture cache */ 625 r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 626 tex_res->size, tex_res->base, 627 tex_res->bo, domain, 0); 628 629 BEGIN_BATCH(9 + 4); 630 PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 631 E32(sq_tex_resource_word0); 632 E32(sq_tex_resource_word1); 633 E32(((tex_res->base) >> 8)); 634 E32(((tex_res->mip_base) >> 8)); 635 E32(sq_tex_resource_word4); 636 E32(sq_tex_resource_word5); 637 E32(sq_tex_resource_word6); 638 RELOC_BATCH(tex_res->bo, domain, 0); 639 RELOC_BATCH(tex_res->mip_bo, domain, 0); 640 END_BATCH(); 641} 642 643void 644r600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 645{ 646 RADEONInfoPtr info = RADEONPTR(pScrn); 647 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 648 649 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 650 (s->clamp_y << CLAMP_Y_shift) | 651 (s->clamp_z << CLAMP_Z_shift) | 652 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 653 (s->xy_min_filter << XY_MIN_FILTER_shift) | 654 (s->z_filter << Z_FILTER_shift) | 655 (s->mip_filter << MIP_FILTER_shift) | 656 (s->border_color << BORDER_COLOR_TYPE_shift) | 657 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 658 (s->chroma_key << CHROMA_KEY_shift)); 659 if (s->point_sampling_clamp) 660 sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 661 if (s->tex_array_override) 662 sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 663 if (s->lod_uses_minor_axis) 664 sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 665 666 sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 667 (s->max_lod << MAX_LOD_shift) | 668 (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 669 670 sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 671 (s->perf_mip << PERF_MIP_shift) | 672 (s->perf_z << PERF_Z_shift)); 673 if (s->mc_coord_truncate) 674 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 675 if (s->force_degamma) 676 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 677 if (s->high_precision_filter) 678 sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 679 if (s->fetch_4) 680 sq_tex_sampler_word2 |= FETCH_4_bit; 681 if (s->sample_is_pcf) 682 sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 683 if (s->type) 684 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 685 686 BEGIN_BATCH(5); 687 PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 688 E32(sq_tex_sampler_word0); 689 E32(sq_tex_sampler_word1); 690 E32(sq_tex_sampler_word2); 691 END_BATCH(); 692} 693 694//XXX deal with clip offsets in clip setup 695void 696r600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 697{ 698 RADEONInfoPtr info = RADEONPTR(pScrn); 699 700 BEGIN_BATCH(4); 701 PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 702 E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 703 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 704 E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 705 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 706 END_BATCH(); 707} 708 709void 710r600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 711{ 712 RADEONInfoPtr info = RADEONPTR(pScrn); 713 714 BEGIN_BATCH(4); 715 PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 716 E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 717 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 718 WINDOW_OFFSET_DISABLE_bit)); 719 E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 720 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 721 END_BATCH(); 722} 723 724void 725r600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 726{ 727 RADEONInfoPtr info = RADEONPTR(pScrn); 728 729 BEGIN_BATCH(4); 730 PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 731 E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 732 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 733 WINDOW_OFFSET_DISABLE_bit)); 734 E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 735 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 736 END_BATCH(); 737} 738 739void 740r600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 741{ 742 RADEONInfoPtr info = RADEONPTR(pScrn); 743 744 BEGIN_BATCH(4); 745 PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 746 E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 747 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 748 WINDOW_OFFSET_DISABLE_bit)); 749 E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 750 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 751 END_BATCH(); 752} 753 754void 755r600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 756{ 757 RADEONInfoPtr info = RADEONPTR(pScrn); 758 759 BEGIN_BATCH(4); 760 PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 761 E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 762 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 763 E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 764 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 765 END_BATCH(); 766} 767 768/* 769 * Setup of default state 770 */ 771 772void 773r600_set_default_state(ScrnInfoPtr pScrn) 774{ 775 tex_resource_t tex_res; 776 shader_config_t fs_conf; 777 sq_config_t sq_conf; 778 int i; 779 RADEONInfoPtr info = RADEONPTR(pScrn); 780 struct radeon_accel_state *accel_state = info->accel_state; 781 782 if (accel_state->XInited3D) 783 return; 784 785 memset(&tex_res, 0, sizeof(tex_resource_t)); 786 memset(&fs_conf, 0, sizeof(shader_config_t)); 787 788 accel_state->XInited3D = TRUE; 789 790 r600_start_3d(pScrn); 791 792 // SQ 793 sq_conf.ps_prio = 0; 794 sq_conf.vs_prio = 1; 795 sq_conf.gs_prio = 2; 796 sq_conf.es_prio = 3; 797 // need to set stack/thread/gpr limits based on the asic 798 // for now just set them low enough so any card will work 799 // see r600_cp.c in the drm 800 switch (info->ChipFamily) { 801 case CHIP_FAMILY_R600: 802 sq_conf.num_ps_gprs = 192; 803 sq_conf.num_vs_gprs = 56; 804 sq_conf.num_temp_gprs = 4; 805 sq_conf.num_gs_gprs = 0; 806 sq_conf.num_es_gprs = 0; 807 sq_conf.num_ps_threads = 136; 808 sq_conf.num_vs_threads = 48; 809 sq_conf.num_gs_threads = 4; 810 sq_conf.num_es_threads = 4; 811 sq_conf.num_ps_stack_entries = 128; 812 sq_conf.num_vs_stack_entries = 128; 813 sq_conf.num_gs_stack_entries = 0; 814 sq_conf.num_es_stack_entries = 0; 815 break; 816 case CHIP_FAMILY_RV630: 817 case CHIP_FAMILY_RV635: 818 sq_conf.num_ps_gprs = 84; 819 sq_conf.num_vs_gprs = 36; 820 sq_conf.num_temp_gprs = 4; 821 sq_conf.num_gs_gprs = 0; 822 sq_conf.num_es_gprs = 0; 823 sq_conf.num_ps_threads = 144; 824 sq_conf.num_vs_threads = 40; 825 sq_conf.num_gs_threads = 4; 826 sq_conf.num_es_threads = 4; 827 sq_conf.num_ps_stack_entries = 40; 828 sq_conf.num_vs_stack_entries = 40; 829 sq_conf.num_gs_stack_entries = 32; 830 sq_conf.num_es_stack_entries = 16; 831 break; 832 case CHIP_FAMILY_RV610: 833 case CHIP_FAMILY_RV620: 834 case CHIP_FAMILY_RS780: 835 case CHIP_FAMILY_RS880: 836 default: 837 sq_conf.num_ps_gprs = 84; 838 sq_conf.num_vs_gprs = 36; 839 sq_conf.num_temp_gprs = 4; 840 sq_conf.num_gs_gprs = 0; 841 sq_conf.num_es_gprs = 0; 842 sq_conf.num_ps_threads = 136; 843 sq_conf.num_vs_threads = 48; 844 sq_conf.num_gs_threads = 4; 845 sq_conf.num_es_threads = 4; 846 sq_conf.num_ps_stack_entries = 40; 847 sq_conf.num_vs_stack_entries = 40; 848 sq_conf.num_gs_stack_entries = 32; 849 sq_conf.num_es_stack_entries = 16; 850 break; 851 case CHIP_FAMILY_RV670: 852 sq_conf.num_ps_gprs = 144; 853 sq_conf.num_vs_gprs = 40; 854 sq_conf.num_temp_gprs = 4; 855 sq_conf.num_gs_gprs = 0; 856 sq_conf.num_es_gprs = 0; 857 sq_conf.num_ps_threads = 136; 858 sq_conf.num_vs_threads = 48; 859 sq_conf.num_gs_threads = 4; 860 sq_conf.num_es_threads = 4; 861 sq_conf.num_ps_stack_entries = 40; 862 sq_conf.num_vs_stack_entries = 40; 863 sq_conf.num_gs_stack_entries = 32; 864 sq_conf.num_es_stack_entries = 16; 865 break; 866 case CHIP_FAMILY_RV770: 867 sq_conf.num_ps_gprs = 192; 868 sq_conf.num_vs_gprs = 56; 869 sq_conf.num_temp_gprs = 4; 870 sq_conf.num_gs_gprs = 0; 871 sq_conf.num_es_gprs = 0; 872 sq_conf.num_ps_threads = 188; 873 sq_conf.num_vs_threads = 60; 874 sq_conf.num_gs_threads = 0; 875 sq_conf.num_es_threads = 0; 876 sq_conf.num_ps_stack_entries = 256; 877 sq_conf.num_vs_stack_entries = 256; 878 sq_conf.num_gs_stack_entries = 0; 879 sq_conf.num_es_stack_entries = 0; 880 break; 881 case CHIP_FAMILY_RV730: 882 case CHIP_FAMILY_RV740: 883 sq_conf.num_ps_gprs = 84; 884 sq_conf.num_vs_gprs = 36; 885 sq_conf.num_temp_gprs = 4; 886 sq_conf.num_gs_gprs = 0; 887 sq_conf.num_es_gprs = 0; 888 sq_conf.num_ps_threads = 188; 889 sq_conf.num_vs_threads = 60; 890 sq_conf.num_gs_threads = 0; 891 sq_conf.num_es_threads = 0; 892 sq_conf.num_ps_stack_entries = 128; 893 sq_conf.num_vs_stack_entries = 128; 894 sq_conf.num_gs_stack_entries = 0; 895 sq_conf.num_es_stack_entries = 0; 896 break; 897 case CHIP_FAMILY_RV710: 898 sq_conf.num_ps_gprs = 192; 899 sq_conf.num_vs_gprs = 56; 900 sq_conf.num_temp_gprs = 4; 901 sq_conf.num_gs_gprs = 0; 902 sq_conf.num_es_gprs = 0; 903 sq_conf.num_ps_threads = 144; 904 sq_conf.num_vs_threads = 48; 905 sq_conf.num_gs_threads = 0; 906 sq_conf.num_es_threads = 0; 907 sq_conf.num_ps_stack_entries = 128; 908 sq_conf.num_vs_stack_entries = 128; 909 sq_conf.num_gs_stack_entries = 0; 910 sq_conf.num_es_stack_entries = 0; 911 break; 912 } 913 914 r600_sq_setup(pScrn, &sq_conf); 915 916 /* set fake reloc for unused depth */ 917 BEGIN_BATCH(3 + 2); 918 EREG(DB_DEPTH_INFO, 0); 919 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 920 END_BATCH(); 921 922 BEGIN_BATCH(80); 923 if (info->ChipFamily < CHIP_FAMILY_RV770) { 924 EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 925 (28 << TD_FIFO_CREDIT_shift))); 926 EREG(VC_ENHANCE, 0); 927 EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 928 EREG(DB_DEBUG, 0x82000000); /* ? */ 929 EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 930 (16 << DEPTH_FLUSH_shift) | 931 (0 << FORCE_SUMMARIZE_shift) | 932 (4 << DEPTH_PENDING_FREE_shift) | 933 (16 << DEPTH_CACHELINE_FREE_shift) | 934 0)); 935 } else { 936 EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 937 (28 << TD_FIFO_CREDIT_shift))); 938 EREG(VC_ENHANCE, 0); 939 EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 940 EREG(DB_DEBUG, 0); 941 EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 942 (16 << DEPTH_FLUSH_shift) | 943 (0 << FORCE_SUMMARIZE_shift) | 944 (4 << DEPTH_PENDING_FREE_shift) | 945 (4 << DEPTH_CACHELINE_FREE_shift) | 946 0)); 947 } 948 949 PACK0(SQ_VTX_BASE_VTX_LOC, 2); 950 E32(0); 951 E32(0); 952 953 PACK0(SQ_ESGS_RING_ITEMSIZE, 9); 954 E32(0); // SQ_ESGS_RING_ITEMSIZE 955 E32(0); // SQ_GSVS_RING_ITEMSIZE 956 E32(0); // SQ_ESTMP_RING_ITEMSIZE 957 E32(0); // SQ_GSTMP_RING_ITEMSIZE 958 E32(0); // SQ_VSTMP_RING_ITEMSIZE 959 E32(0); // SQ_PSTMP_RING_ITEMSIZE 960 E32(0); // SQ_FBUF_RING_ITEMSIZE 961 E32(0); // SQ_REDUC_RING_ITEMSIZE 962 E32(0); // SQ_GS_VERT_ITEMSIZE 963 964 // DB 965 EREG(DB_DEPTH_CONTROL, 0); 966 PACK0(DB_RENDER_CONTROL, 2); 967 E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 968 if (info->ChipFamily < CHIP_FAMILY_RV770) 969 E32(FORCE_SHADER_Z_ORDER_bit); 970 else 971 E32(0); 972 EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 973 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 974 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 975 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 976 EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 977 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 978 979 PACK0(DB_STENCIL_CLEAR, 2); 980 E32(0); // DB_STENCIL_CLEAR 981 E32(0); // DB_DEPTH_CLEAR 982 983 PACK0(DB_STENCILREFMASK, 3); 984 E32(0); // DB_STENCILREFMASK 985 E32(0); // DB_STENCILREFMASK_BF 986 E32(0); // SX_ALPHA_REF 987 988 PACK0(CB_CLRCMP_CONTROL, 4); 989 E32(1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 990 E32(0); // CB_CLRCMP_SRC 991 E32(0); // CB_CLRCMP_DST 992 E32(0); // CB_CLRCMP_MSK 993 994 EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 995 EREG(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 996 997 PACK0(SX_ALPHA_TEST_CONTROL, 5); 998 E32(0); // SX_ALPHA_TEST_CONTROL 999 E32(0x00000000); // CB_BLEND_RED 1000 E32(0x00000000); // CB_BLEND_GREEN 1001 E32(0x00000000); // CB_BLEND_BLUE 1002 E32(0x00000000); // CB_BLEND_ALPHA 1003 1004 EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 1005 (0 << WINDOW_Y_OFFSET_shift))); 1006 1007 if (info->ChipFamily < CHIP_FAMILY_RV770) 1008 EREG(R7xx_PA_SC_EDGERULE, 0x00000000); 1009 else 1010 EREG(R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 1011 1012 EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 1013 1014 END_BATCH(); 1015 1016 /* clip boolean is set to always visible -> doesn't matter */ 1017 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1018 r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192); 1019 1020 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1021 r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192); 1022 1023 BEGIN_BATCH(49); 1024 PACK0(PA_SC_MPASS_PS_CNTL, 2); 1025 E32(0); 1026 if (info->ChipFamily < CHIP_FAMILY_RV770) 1027 E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1028 else 1029 E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1030 0x00500000)); /* ? */ 1031 1032 PACK0(PA_SC_LINE_CNTL, 9); 1033 E32(0); // PA_SC_LINE_CNTL 1034 E32(0); // PA_SC_AA_CONFIG 1035 E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL 1036 (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 1037 EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 1038 EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 1039 EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1040 EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 1041 E32(0); // PA_SC_AA_SAMPLE_LOCS_MCTX 1042 E32(0); // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M 1043 1044 EREG(PA_SC_AA_MASK, 0xFFFFFFFF); 1045 1046 PACK0(PA_CL_CLIP_CNTL, 5); 1047 E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1048 E32(FACE_bit); // PA_SU_SC_MODE_CNTL 1049 E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1050 E32(0); // PA_CL_VS_OUT_CNTL 1051 E32(0); // PA_CL_NANINF_CNTL 1052 1053 PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1054 E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL 1055 E32(0); // PA_SU_POLY_OFFSET_CLAMP 1056 E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE 1057 E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET 1058 E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE 1059 E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET 1060 1061 // SPI 1062 if (info->ChipFamily < CHIP_FAMILY_RV770) 1063 EREG(R7xx_SPI_THREAD_GROUPING, 0); 1064 else 1065 EREG(R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1066 1067 /* default Interpolator setup */ 1068 EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1069 (1 << SEMANTIC_1_shift))); 1070 PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1071 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1072 E32(((0 << SEMANTIC_shift) | 1073 (0x01 << DEFAULT_VAL_shift) | 1074 SEL_CENTROID_bit)); 1075 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1076 E32(((1 << SEMANTIC_shift) | 1077 (0x01 << DEFAULT_VAL_shift) | 1078 SEL_CENTROID_bit)); 1079 1080 PACK0(SPI_INPUT_Z, 4); 1081 E32(0); // SPI_INPUT_Z 1082 E32(0); // SPI_FOG_CNTL 1083 E32(0); // SPI_FOG_FUNC_SCALE 1084 E32(0); // SPI_FOG_FUNC_BIAS 1085 1086 END_BATCH(); 1087 1088 // clear FS 1089 fs_conf.bo = accel_state->shaders_bo; 1090 r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1091 1092 // VGT 1093 BEGIN_BATCH(46); 1094 PACK0(VGT_MAX_VTX_INDX, 4); 1095 E32(0xffffff); // VGT_MAX_VTX_INDX 1096 E32(0); // VGT_MIN_VTX_INDX 1097 E32(0); // VGT_INDX_OFFSET 1098 E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX 1099 1100 EREG(VGT_PRIMITIVEID_EN, 0); 1101 EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 1102 1103 PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 1104 E32(0); // VGT_INSTANCE_STEP_RATE_0 1105 E32(0); // VGT_INSTANCE_STEP_RATE_1 1106 1107 PACK0(PA_SU_POINT_SIZE, 17); 1108 E32(0); // PA_SU_POINT_SIZE 1109 E32(0); // PA_SU_POINT_MINMAX 1110 E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1111 E32(0); // PA_SC_LINE_STIPPLE 1112 E32(0); // VGT_OUTPUT_PATH_CNTL 1113 E32(0); // VGT_HOS_CNTL 1114 E32(0); // VGT_HOS_MAX_TESS_LEVEL 1115 E32(0); // VGT_HOS_MIN_TESS_LEVEL 1116 E32(0); // VGT_HOS_REUSE_DEPTH 1117 E32(0); // VGT_GROUP_PRIM_TYPE 1118 E32(0); // VGT_GROUP_FIRST_DECR 1119 E32(0); // VGT_GROUP_DECR 1120 E32(0); // VGT_GROUP_VECT_0_CNTL 1121 E32(0); // VGT_GROUP_VECT_1_CNTL 1122 E32(0); // VGT_GROUP_VECT_0_FMT_CNTL 1123 E32(0); // VGT_GROUP_VECT_1_FMT_CNTL 1124 E32(0); // VGT_GS_MODE 1125 1126 PACK0(VGT_STRMOUT_EN, 3); 1127 E32(0); // VGT_STRMOUT_EN 1128 E32(0); // VGT_REUSE_OFF 1129 E32(0); // VGT_VTX_CNT_EN 1130 1131 EREG(VGT_STRMOUT_BUFFER_EN, 0); 1132 EREG(SX_MISC, 0); 1133 END_BATCH(); 1134} 1135 1136 1137/* 1138 * Commands 1139 */ 1140 1141void 1142r600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices) 1143{ 1144 RADEONInfoPtr info = RADEONPTR(pScrn); 1145 uint32_t i, count; 1146 1147 // calculate num of packets 1148 count = 2; 1149 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1150 count += (draw_conf->num_indices + 1) / 2; 1151 else 1152 count += draw_conf->num_indices; 1153 1154 BEGIN_BATCH(8 + count); 1155 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1156 PACK3(IT_INDEX_TYPE, 1); 1157#if X_BYTE_ORDER == X_BIG_ENDIAN 1158 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1159#else 1160 E32(draw_conf->index_type); 1161#endif 1162 PACK3(IT_NUM_INSTANCES, 1); 1163 E32(draw_conf->num_instances); 1164 1165 PACK3(IT_DRAW_INDEX_IMMD, count); 1166 E32(draw_conf->num_indices); 1167 E32(draw_conf->vgt_draw_initiator); 1168 1169 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1170 for (i = 0; i < draw_conf->num_indices; i += 2) { 1171 if ((i + 1) == draw_conf->num_indices) 1172 E32(indices[i]); 1173 else 1174 E32((indices[i] | (indices[i + 1] << 16))); 1175 } 1176 } else { 1177 for (i = 0; i < draw_conf->num_indices; i++) 1178 E32(indices[i]); 1179 } 1180 END_BATCH(); 1181} 1182 1183void 1184r600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1185{ 1186 RADEONInfoPtr info = RADEONPTR(pScrn); 1187 1188 BEGIN_BATCH(10); 1189 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1190 PACK3(IT_INDEX_TYPE, 1); 1191#if X_BYTE_ORDER == X_BIG_ENDIAN 1192 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1193#else 1194 E32(draw_conf->index_type); 1195#endif 1196 PACK3(IT_NUM_INSTANCES, 1); 1197 E32(draw_conf->num_instances); 1198 PACK3(IT_DRAW_INDEX_AUTO, 2); 1199 E32(draw_conf->num_indices); 1200 E32(draw_conf->vgt_draw_initiator); 1201 END_BATCH(); 1202} 1203 1204void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1205{ 1206 RADEONInfoPtr info = RADEONPTR(pScrn); 1207 struct radeon_accel_state *accel_state = info->accel_state; 1208 draw_config_t draw_conf; 1209 vtx_resource_t vtx_res; 1210 1211 if (accel_state->vbo.vb_start_op == -1) 1212 return; 1213 1214 CLEAR (draw_conf); 1215 CLEAR (vtx_res); 1216 1217 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1218 R600IBDiscard(pScrn); 1219 return; 1220 } 1221 1222 /* Vertex buffer setup */ 1223 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1224 vtx_res.id = SQ_VTX_RESOURCE_vs; 1225 vtx_res.vtx_size_dw = vtx_size / 4; 1226 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1227 vtx_res.mem_req_size = 1; 1228 vtx_res.vb_addr = accel_state->vbo.vb_start_op; 1229 vtx_res.bo = accel_state->vbo.vb_bo; 1230#if X_BYTE_ORDER == X_BIG_ENDIAN 1231 vtx_res.endian = SQ_ENDIAN_8IN32; 1232#endif 1233 r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1234 1235 /* Draw */ 1236 draw_conf.prim_type = DI_PT_RECTLIST; 1237 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1238 draw_conf.num_instances = 1; 1239 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1240 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1241 1242 r600_draw_auto(pScrn, &draw_conf); 1243 1244 /* XXX drm should handle this in fence submit */ 1245 r600_wait_3d_idle_clean(pScrn); 1246 1247 /* sync dst surface */ 1248 r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1249 accel_state->dst_size, 0, 1250 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1251 1252 accel_state->vbo.vb_start_op = -1; 1253 accel_state->ib_reset_op = 0; 1254 1255} 1256 1257