r6xx_accel.c revision 70cce690
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * Matthias Hopf <mhopf@suse.de> 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#include "xf86.h" 31 32#include <errno.h> 33 34#include "radeon.h" 35#include "r600_shader.h" 36#include "radeon_reg.h" 37#include "r600_reg.h" 38#include "r600_state.h" 39 40#include "radeon_drm.h" 41#include "radeon_vbo.h" 42#include "radeon_exa_shared.h" 43 44/* we try and batch operations together under KMS - 45 but it doesn't work yet without misrendering */ 46#define KMS_MULTI_OP 1 47 48/* Flush the indirect buffer to the kernel for submission to the card */ 49void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) 50{ 51 RADEONInfoPtr info = RADEONPTR(pScrn); 52 drmBufPtr buffer = ib; 53 int start = 0; 54 drm_radeon_indirect_t indirect; 55 56#if defined(XF86DRM_MODE) 57 if (info->cs) { 58 radeon_cs_flush_indirect(pScrn); 59 return; 60 } 61#endif 62 63 if (!buffer) return; 64 65 //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", 66 // buffer->idx); 67 68 while (buffer->used & 0x3c){ 69 BEGIN_BATCH(1); 70 E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ 71 END_BATCH(); 72 } 73 74 info->accel_state->vbo.vb_offset = 0; 75 info->accel_state->vbo.vb_start_op = -1; 76 77 //ErrorF("buffer bytes: %d\n", buffer->used); 78 79 indirect.idx = buffer->idx; 80 indirect.start = start; 81 indirect.end = buffer->used; 82 indirect.discard = 1; 83 84 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 85 &indirect, sizeof(drm_radeon_indirect_t)); 86 87} 88 89void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) 90{ 91#if defined(XF86DRM_MODE) 92 RADEONInfoPtr info = RADEONPTR(pScrn); 93 if (info->cs) { 94 radeon_ib_discard(pScrn); 95 } 96#endif 97 if (!ib) return; 98 99 ib->used = 0; 100 R600CPFlushIndirect(pScrn, ib); 101} 102 103void 104r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) 105{ 106 RADEONInfoPtr info = RADEONPTR(pScrn); 107 108 //flush caches, don't generate timestamp 109 BEGIN_BATCH(5); 110 PACK3(ib, IT_EVENT_WRITE, 1); 111 E32(ib, CACHE_FLUSH_AND_INV_EVENT); 112 // wait for 3D idle clean 113 EREG(ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | 114 WAIT_3D_IDLECLEAN_bit)); 115 END_BATCH(); 116} 117 118void 119r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) 120{ 121 RADEONInfoPtr info = RADEONPTR(pScrn); 122 123 BEGIN_BATCH(3); 124 EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); 125 END_BATCH(); 126} 127 128void 129r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) 130{ 131 RADEONInfoPtr info = RADEONPTR(pScrn); 132 133 if (info->ChipFamily < CHIP_FAMILY_RV770) { 134 BEGIN_BATCH(5); 135 PACK3(ib, IT_START_3D_CMDBUF, 1); 136 E32(ib, 0); 137 } else 138 BEGIN_BATCH(3); 139 140 PACK3(ib, IT_CONTEXT_CONTROL, 2); 141 E32(ib, 0x80000000); 142 E32(ib, 0x80000000); 143 END_BATCH(); 144 145} 146 147/* 148 * Setup of functional groups 149 */ 150 151// asic stack/thread/gpr limits - need to query the drm 152static void 153r600_sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) 154{ 155 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 156 uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 157 RADEONInfoPtr info = RADEONPTR(pScrn); 158 159 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 160 (info->ChipFamily == CHIP_FAMILY_RV620) || 161 (info->ChipFamily == CHIP_FAMILY_RS780) || 162 (info->ChipFamily == CHIP_FAMILY_RS880) || 163 (info->ChipFamily == CHIP_FAMILY_RV710)) 164 sq_config = 0; // no VC 165 else 166 sq_config = VC_ENABLE_bit; 167 168 sq_config |= (DX9_CONSTS_bit | 169 ALU_INST_PREFER_VECTOR_bit | 170 (sq_conf->ps_prio << PS_PRIO_shift) | 171 (sq_conf->vs_prio << VS_PRIO_shift) | 172 (sq_conf->gs_prio << GS_PRIO_shift) | 173 (sq_conf->es_prio << ES_PRIO_shift)); 174 175 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 176 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 177 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 178 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 179 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 180 181 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 182 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 183 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 184 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 185 186 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 187 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 188 189 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 190 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 191 192 BEGIN_BATCH(8); 193 PACK0(ib, SQ_CONFIG, 6); 194 E32(ib, sq_config); 195 E32(ib, sq_gpr_resource_mgmt_1); 196 E32(ib, sq_gpr_resource_mgmt_2); 197 E32(ib, sq_thread_resource_mgmt); 198 E32(ib, sq_stack_resource_mgmt_1); 199 E32(ib, sq_stack_resource_mgmt_2); 200 END_BATCH(); 201} 202 203void 204r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain) 205{ 206 uint32_t cb_color_info; 207 int pitch, slice, h; 208 RADEONInfoPtr info = RADEONPTR(pScrn); 209 210 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 211 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 212 (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 213 (cb_conf->number_type << NUMBER_TYPE_shift) | 214 (cb_conf->comp_swap << COMP_SWAP_shift) | 215 (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 216 if (cb_conf->read_size) 217 cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 218 if (cb_conf->blend_clamp) 219 cb_color_info |= BLEND_CLAMP_bit; 220 if (cb_conf->clear_color) 221 cb_color_info |= CLEAR_COLOR_bit; 222 if (cb_conf->blend_bypass) 223 cb_color_info |= BLEND_BYPASS_bit; 224 if (cb_conf->blend_float32) 225 cb_color_info |= BLEND_FLOAT32_bit; 226 if (cb_conf->simple_float) 227 cb_color_info |= SIMPLE_FLOAT_bit; 228 if (cb_conf->round_mode) 229 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 230 if (cb_conf->tile_compact) 231 cb_color_info |= TILE_COMPACT_bit; 232 if (cb_conf->source_format) 233 cb_color_info |= SOURCE_FORMAT_bit; 234 235 pitch = (cb_conf->w / 8) - 1; 236 h = RADEON_ALIGN(cb_conf->h, 8); 237 slice = ((cb_conf->w * h) / 64) - 1; 238 239 BEGIN_BATCH(3 + 2); 240 EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 241 RELOC_BATCH(cb_conf->bo, 0, domain); 242 END_BATCH(); 243 244 // rv6xx workaround 245 if ((info->ChipFamily > CHIP_FAMILY_R600) && 246 (info->ChipFamily < CHIP_FAMILY_RV770)) { 247 BEGIN_BATCH(2); 248 PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); 249 E32(ib, (2 << cb_conf->id)); 250 END_BATCH(); 251 } 252 /* Set CMASK & TILE buffer to the offset of color buffer as 253 * we don't use those this shouldn't cause any issue and we 254 * then have a valid cmd stream 255 */ 256 BEGIN_BATCH(3 + 2); 257 EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 258 RELOC_BATCH(cb_conf->bo, 0, domain); 259 END_BATCH(); 260 BEGIN_BATCH(3 + 2); 261 EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 262 RELOC_BATCH(cb_conf->bo, 0, domain); 263 END_BATCH(); 264 BEGIN_BATCH(9); 265 // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 266 EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 267 (slice << SLICE_TILE_MAX_shift))); 268 EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 269 (0 << SLICE_MAX_shift))); 270 EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 271 (0 << FMASK_TILE_MAX_shift))); 272 END_BATCH(); 273 274 BEGIN_BATCH(3 + 2); 275 EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 276 RELOC_BATCH(cb_conf->bo, 0, domain); 277 END_BATCH(); 278 279} 280 281static void 282r600_cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, 283 uint32_t size, uint64_t mc_addr, 284 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 285{ 286 RADEONInfoPtr info = RADEONPTR(pScrn); 287 uint32_t cp_coher_size; 288 if (size == 0xffffffff) 289 cp_coher_size = 0xffffffff; 290 else 291 cp_coher_size = ((size + 255) >> 8); 292 293 BEGIN_BATCH(5 + 2); 294 PACK3(ib, IT_SURFACE_SYNC, 4); 295 E32(ib, sync_type); 296 E32(ib, cp_coher_size); 297 E32(ib, (mc_addr >> 8)); 298 E32(ib, 10); /* poll interval */ 299 RELOC_BATCH(bo, rdomains, wdomain); 300 END_BATCH(); 301} 302 303/* inserts a wait for vline in the command stream */ 304void 305r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, 306 xf86CrtcPtr crtc, int start, int stop) 307{ 308 RADEONInfoPtr info = RADEONPTR(pScrn); 309 uint32_t offset; 310 311 if (!crtc) 312 return; 313 314 if (stop < start) 315 return; 316 317 if (!crtc->enabled) 318 return; 319 320 if (info->cs) { 321 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 322 return; 323 } else { 324#ifdef USE_EXA 325 if (info->useEXA) 326 offset = exaGetPixmapOffset(pPix); 327 else 328#endif 329 offset = pPix->devPrivate.ptr - info->FB; 330 331 /* if drawing to front buffer */ 332 if (offset != 0) 333 return; 334 } 335 336 start = max(start, 0); 337 stop = min(stop, crtc->mode.VDisplay); 338 339 if (start > crtc->mode.VDisplay) 340 return; 341 342#if defined(XF86DRM_MODE) 343 if (info->cs) { 344 drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; 345 346 BEGIN_BATCH(11); 347 /* set the VLINE range */ 348 EREG(ib, AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 349 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 350 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 351 352 /* tell the CP to poll the VLINE state register */ 353 PACK3(ib, IT_WAIT_REG_MEM, 6); 354 E32(ib, IT_WAIT_REG | IT_WAIT_EQ); 355 E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 356 E32(ib, 0); 357 E32(ib, 0); // Ref value 358 E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask 359 E32(ib, 10); // Wait interval 360 /* add crtc reloc */ 361 PACK3(ib, IT_NOP, 1); 362 E32(ib, drmmode_crtc->mode_crtc->crtc_id); 363 END_BATCH(); 364 } else 365#endif 366 { 367 RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; 368 369 BEGIN_BATCH(9); 370 /* set the VLINE range */ 371 EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, 372 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 373 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 374 375 /* tell the CP to poll the VLINE state register */ 376 PACK3(ib, IT_WAIT_REG_MEM, 6); 377 E32(ib, IT_WAIT_REG | IT_WAIT_EQ); 378 E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset)); 379 E32(ib, 0); 380 E32(ib, 0); // Ref value 381 E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask 382 E32(ib, 10); // Wait interval 383 END_BATCH(); 384 } 385} 386 387void 388r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain) 389{ 390 RADEONInfoPtr info = RADEONPTR(pScrn); 391 uint32_t sq_pgm_resources; 392 393 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 394 (fs_conf->stack_size << STACK_SIZE_shift)); 395 396 if (fs_conf->dx10_clamp) 397 sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 398 399 BEGIN_BATCH(3 + 2); 400 EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 401 RELOC_BATCH(fs_conf->bo, domain, 0); 402 END_BATCH(); 403 404 BEGIN_BATCH(6); 405 EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); 406 EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); 407 END_BATCH(); 408} 409 410void 411r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain) 412{ 413 RADEONInfoPtr info = RADEONPTR(pScrn); 414 uint32_t sq_pgm_resources; 415 416 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 417 (vs_conf->stack_size << STACK_SIZE_shift)); 418 419 if (vs_conf->dx10_clamp) 420 sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 421 if (vs_conf->fetch_cache_lines) 422 sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 423 if (vs_conf->uncached_first_inst) 424 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 425 426 /* flush SQ cache */ 427 r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, 428 vs_conf->shader_size, vs_conf->shader_addr, 429 vs_conf->bo, domain, 0); 430 431 BEGIN_BATCH(3 + 2); 432 EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 433 RELOC_BATCH(vs_conf->bo, domain, 0); 434 END_BATCH(); 435 436 BEGIN_BATCH(6); 437 EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); 438 EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); 439 END_BATCH(); 440} 441 442void 443r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain) 444{ 445 RADEONInfoPtr info = RADEONPTR(pScrn); 446 uint32_t sq_pgm_resources; 447 448 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 449 (ps_conf->stack_size << STACK_SIZE_shift)); 450 451 if (ps_conf->dx10_clamp) 452 sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 453 if (ps_conf->fetch_cache_lines) 454 sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 455 if (ps_conf->uncached_first_inst) 456 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 457 if (ps_conf->clamp_consts) 458 sq_pgm_resources |= CLAMP_CONSTS_bit; 459 460 /* flush SQ cache */ 461 r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, 462 ps_conf->shader_size, ps_conf->shader_addr, 463 ps_conf->bo, domain, 0); 464 465 BEGIN_BATCH(3 + 2); 466 EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 467 RELOC_BATCH(ps_conf->bo, domain, 0); 468 END_BATCH(); 469 470 BEGIN_BATCH(9); 471 EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); 472 EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 473 EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); 474 END_BATCH(); 475} 476 477void 478r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) 479{ 480 RADEONInfoPtr info = RADEONPTR(pScrn); 481 int i; 482 const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 483 484 BEGIN_BATCH(2 + countreg); 485 PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 486 for (i = 0; i < countreg; i++) 487 EFLOAT(ib, const_buf[i]); 488 END_BATCH(); 489} 490 491void 492r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) 493{ 494 RADEONInfoPtr info = RADEONPTR(pScrn); 495 /* bool register order is: ps, vs, gs; one register each 496 * 1 bits per bool; 32 bools each for ps, vs, gs. 497 */ 498 BEGIN_BATCH(3); 499 EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 500 END_BATCH(); 501} 502 503static void 504r600_set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain) 505{ 506 RADEONInfoPtr info = RADEONPTR(pScrn); 507 struct radeon_accel_state *accel_state = info->accel_state; 508 uint32_t sq_vtx_constant_word2; 509 510 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 511 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 512 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 513 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 514 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 515 if (res->clamp_x) 516 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 517 518 if (res->format_comp_all) 519 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 520 521 if (res->srf_mode_all) 522 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 523 524 /* flush vertex cache */ 525 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 526 (info->ChipFamily == CHIP_FAMILY_RV620) || 527 (info->ChipFamily == CHIP_FAMILY_RS780) || 528 (info->ChipFamily == CHIP_FAMILY_RS880) || 529 (info->ChipFamily == CHIP_FAMILY_RV710)) 530 r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, 531 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 532 res->bo, 533 domain, 0); 534 else 535 r600_cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit, 536 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 537 res->bo, 538 domain, 0); 539 540 BEGIN_BATCH(9 + 2); 541 PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 542 E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 543 E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE 544 E32(ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 545 E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 546 E32(ib, 0); // 4: n/a 547 E32(ib, 0); // 5: n/a 548 E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 549 RELOC_BATCH(res->bo, domain, 0); 550 END_BATCH(); 551} 552 553void 554r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain) 555{ 556 RADEONInfoPtr info = RADEONPTR(pScrn); 557 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 558 uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 559 560 sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 561 (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 562 563 if (tex_res->w) 564 sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | 565 ((tex_res->w - 1) << TEX_WIDTH_shift)); 566 567 if (tex_res->tile_type) 568 sq_tex_resource_word0 |= TILE_TYPE_bit; 569 570 sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 571 572 if (tex_res->h) 573 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 574 if (tex_res->depth) 575 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 576 577 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 578 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 579 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 580 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 581 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 582 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 583 (tex_res->request_size << REQUEST_SIZE_shift) | 584 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 585 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 586 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 587 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 588 (tex_res->base_level << BASE_LEVEL_shift)); 589 590 if (tex_res->srf_mode_all) 591 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 592 if (tex_res->force_degamma) 593 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 594 595 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 596 (tex_res->base_array << BASE_ARRAY_shift) | 597 (tex_res->last_array << LAST_ARRAY_shift)); 598 599 sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 600 (tex_res->perf_modulation << PERF_MODULATION_shift) | 601 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 602 603 if (tex_res->interlaced) 604 sq_tex_resource_word6 |= INTERLACED_bit; 605 606 /* flush texture cache */ 607 r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, 608 tex_res->size, tex_res->base, 609 tex_res->bo, domain, 0); 610 611 BEGIN_BATCH(9 + 4); 612 PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 613 E32(ib, sq_tex_resource_word0); 614 E32(ib, sq_tex_resource_word1); 615 E32(ib, ((tex_res->base) >> 8)); 616 E32(ib, ((tex_res->mip_base) >> 8)); 617 E32(ib, sq_tex_resource_word4); 618 E32(ib, sq_tex_resource_word5); 619 E32(ib, sq_tex_resource_word6); 620 RELOC_BATCH(tex_res->bo, domain, 0); 621 RELOC_BATCH(tex_res->mip_bo, domain, 0); 622 END_BATCH(); 623} 624 625void 626r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) 627{ 628 RADEONInfoPtr info = RADEONPTR(pScrn); 629 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 630 631 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 632 (s->clamp_y << CLAMP_Y_shift) | 633 (s->clamp_z << CLAMP_Z_shift) | 634 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 635 (s->xy_min_filter << XY_MIN_FILTER_shift) | 636 (s->z_filter << Z_FILTER_shift) | 637 (s->mip_filter << MIP_FILTER_shift) | 638 (s->border_color << BORDER_COLOR_TYPE_shift) | 639 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 640 (s->chroma_key << CHROMA_KEY_shift)); 641 if (s->point_sampling_clamp) 642 sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 643 if (s->tex_array_override) 644 sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 645 if (s->lod_uses_minor_axis) 646 sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 647 648 sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 649 (s->max_lod << MAX_LOD_shift) | 650 (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 651 652 sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 653 (s->perf_mip << PERF_MIP_shift) | 654 (s->perf_z << PERF_Z_shift)); 655 if (s->mc_coord_truncate) 656 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 657 if (s->force_degamma) 658 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 659 if (s->high_precision_filter) 660 sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 661 if (s->fetch_4) 662 sq_tex_sampler_word2 |= FETCH_4_bit; 663 if (s->sample_is_pcf) 664 sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 665 if (s->type) 666 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 667 668 BEGIN_BATCH(5); 669 PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 670 E32(ib, sq_tex_sampler_word0); 671 E32(ib, sq_tex_sampler_word1); 672 E32(ib, sq_tex_sampler_word2); 673 END_BATCH(); 674} 675 676//XXX deal with clip offsets in clip setup 677void 678r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 679{ 680 RADEONInfoPtr info = RADEONPTR(pScrn); 681 682 BEGIN_BATCH(4); 683 PACK0(ib, PA_SC_SCREEN_SCISSOR_TL, 2); 684 E32(ib, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 685 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 686 E32(ib, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 687 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 688 END_BATCH(); 689} 690 691void 692r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 693{ 694 RADEONInfoPtr info = RADEONPTR(pScrn); 695 696 BEGIN_BATCH(4); 697 PACK0(ib, PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 698 E32(ib, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 699 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 700 WINDOW_OFFSET_DISABLE_bit)); 701 E32(ib, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 702 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 703 END_BATCH(); 704} 705 706void 707r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 708{ 709 RADEONInfoPtr info = RADEONPTR(pScrn); 710 711 BEGIN_BATCH(4); 712 PACK0(ib, PA_SC_GENERIC_SCISSOR_TL, 2); 713 E32(ib, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 714 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 715 WINDOW_OFFSET_DISABLE_bit)); 716 E32(ib, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 717 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 718 END_BATCH(); 719} 720 721void 722r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 723{ 724 RADEONInfoPtr info = RADEONPTR(pScrn); 725 726 BEGIN_BATCH(4); 727 PACK0(ib, PA_SC_WINDOW_SCISSOR_TL, 2); 728 E32(ib, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 729 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 730 WINDOW_OFFSET_DISABLE_bit)); 731 E32(ib, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 732 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 733 END_BATCH(); 734} 735 736void 737r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 738{ 739 RADEONInfoPtr info = RADEONPTR(pScrn); 740 741 BEGIN_BATCH(4); 742 PACK0(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 743 E32(ib, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 744 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 745 E32(ib, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 746 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 747 END_BATCH(); 748} 749 750/* 751 * Setup of default state 752 */ 753 754void 755r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) 756{ 757 tex_resource_t tex_res; 758 shader_config_t fs_conf; 759 sq_config_t sq_conf; 760 int i; 761 RADEONInfoPtr info = RADEONPTR(pScrn); 762 struct radeon_accel_state *accel_state = info->accel_state; 763 764 if (accel_state->XInited3D) 765 return; 766 767 memset(&tex_res, 0, sizeof(tex_resource_t)); 768 memset(&fs_conf, 0, sizeof(shader_config_t)); 769 770 accel_state->XInited3D = TRUE; 771 772 r600_start_3d(pScrn, accel_state->ib); 773 774 // SQ 775 sq_conf.ps_prio = 0; 776 sq_conf.vs_prio = 1; 777 sq_conf.gs_prio = 2; 778 sq_conf.es_prio = 3; 779 // need to set stack/thread/gpr limits based on the asic 780 // for now just set them low enough so any card will work 781 // see r600_cp.c in the drm 782 switch (info->ChipFamily) { 783 case CHIP_FAMILY_R600: 784 sq_conf.num_ps_gprs = 192; 785 sq_conf.num_vs_gprs = 56; 786 sq_conf.num_temp_gprs = 4; 787 sq_conf.num_gs_gprs = 0; 788 sq_conf.num_es_gprs = 0; 789 sq_conf.num_ps_threads = 136; 790 sq_conf.num_vs_threads = 48; 791 sq_conf.num_gs_threads = 4; 792 sq_conf.num_es_threads = 4; 793 sq_conf.num_ps_stack_entries = 128; 794 sq_conf.num_vs_stack_entries = 128; 795 sq_conf.num_gs_stack_entries = 0; 796 sq_conf.num_es_stack_entries = 0; 797 break; 798 case CHIP_FAMILY_RV630: 799 case CHIP_FAMILY_RV635: 800 sq_conf.num_ps_gprs = 84; 801 sq_conf.num_vs_gprs = 36; 802 sq_conf.num_temp_gprs = 4; 803 sq_conf.num_gs_gprs = 0; 804 sq_conf.num_es_gprs = 0; 805 sq_conf.num_ps_threads = 144; 806 sq_conf.num_vs_threads = 40; 807 sq_conf.num_gs_threads = 4; 808 sq_conf.num_es_threads = 4; 809 sq_conf.num_ps_stack_entries = 40; 810 sq_conf.num_vs_stack_entries = 40; 811 sq_conf.num_gs_stack_entries = 32; 812 sq_conf.num_es_stack_entries = 16; 813 break; 814 case CHIP_FAMILY_RV610: 815 case CHIP_FAMILY_RV620: 816 case CHIP_FAMILY_RS780: 817 case CHIP_FAMILY_RS880: 818 default: 819 sq_conf.num_ps_gprs = 84; 820 sq_conf.num_vs_gprs = 36; 821 sq_conf.num_temp_gprs = 4; 822 sq_conf.num_gs_gprs = 0; 823 sq_conf.num_es_gprs = 0; 824 sq_conf.num_ps_threads = 136; 825 sq_conf.num_vs_threads = 48; 826 sq_conf.num_gs_threads = 4; 827 sq_conf.num_es_threads = 4; 828 sq_conf.num_ps_stack_entries = 40; 829 sq_conf.num_vs_stack_entries = 40; 830 sq_conf.num_gs_stack_entries = 32; 831 sq_conf.num_es_stack_entries = 16; 832 break; 833 case CHIP_FAMILY_RV670: 834 sq_conf.num_ps_gprs = 144; 835 sq_conf.num_vs_gprs = 40; 836 sq_conf.num_temp_gprs = 4; 837 sq_conf.num_gs_gprs = 0; 838 sq_conf.num_es_gprs = 0; 839 sq_conf.num_ps_threads = 136; 840 sq_conf.num_vs_threads = 48; 841 sq_conf.num_gs_threads = 4; 842 sq_conf.num_es_threads = 4; 843 sq_conf.num_ps_stack_entries = 40; 844 sq_conf.num_vs_stack_entries = 40; 845 sq_conf.num_gs_stack_entries = 32; 846 sq_conf.num_es_stack_entries = 16; 847 break; 848 case CHIP_FAMILY_RV770: 849 sq_conf.num_ps_gprs = 192; 850 sq_conf.num_vs_gprs = 56; 851 sq_conf.num_temp_gprs = 4; 852 sq_conf.num_gs_gprs = 0; 853 sq_conf.num_es_gprs = 0; 854 sq_conf.num_ps_threads = 188; 855 sq_conf.num_vs_threads = 60; 856 sq_conf.num_gs_threads = 0; 857 sq_conf.num_es_threads = 0; 858 sq_conf.num_ps_stack_entries = 256; 859 sq_conf.num_vs_stack_entries = 256; 860 sq_conf.num_gs_stack_entries = 0; 861 sq_conf.num_es_stack_entries = 0; 862 break; 863 case CHIP_FAMILY_RV730: 864 case CHIP_FAMILY_RV740: 865 sq_conf.num_ps_gprs = 84; 866 sq_conf.num_vs_gprs = 36; 867 sq_conf.num_temp_gprs = 4; 868 sq_conf.num_gs_gprs = 0; 869 sq_conf.num_es_gprs = 0; 870 sq_conf.num_ps_threads = 188; 871 sq_conf.num_vs_threads = 60; 872 sq_conf.num_gs_threads = 0; 873 sq_conf.num_es_threads = 0; 874 sq_conf.num_ps_stack_entries = 128; 875 sq_conf.num_vs_stack_entries = 128; 876 sq_conf.num_gs_stack_entries = 0; 877 sq_conf.num_es_stack_entries = 0; 878 break; 879 case CHIP_FAMILY_RV710: 880 sq_conf.num_ps_gprs = 192; 881 sq_conf.num_vs_gprs = 56; 882 sq_conf.num_temp_gprs = 4; 883 sq_conf.num_gs_gprs = 0; 884 sq_conf.num_es_gprs = 0; 885 sq_conf.num_ps_threads = 144; 886 sq_conf.num_vs_threads = 48; 887 sq_conf.num_gs_threads = 0; 888 sq_conf.num_es_threads = 0; 889 sq_conf.num_ps_stack_entries = 128; 890 sq_conf.num_vs_stack_entries = 128; 891 sq_conf.num_gs_stack_entries = 0; 892 sq_conf.num_es_stack_entries = 0; 893 break; 894 } 895 896 r600_sq_setup(pScrn, ib, &sq_conf); 897 898 /* set fake reloc for unused depth */ 899 BEGIN_BATCH(3 + 2); 900 EREG(ib, DB_DEPTH_INFO, 0); 901 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 902 END_BATCH(); 903 904 BEGIN_BATCH(80); 905 if (info->ChipFamily < CHIP_FAMILY_RV770) { 906 EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 907 (28 << TD_FIFO_CREDIT_shift))); 908 EREG(ib, VC_ENHANCE, 0); 909 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 910 EREG(ib, DB_DEBUG, 0x82000000); /* ? */ 911 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 912 (16 << DEPTH_FLUSH_shift) | 913 (0 << FORCE_SUMMARIZE_shift) | 914 (4 << DEPTH_PENDING_FREE_shift) | 915 (16 << DEPTH_CACHELINE_FREE_shift) | 916 0)); 917 } else { 918 EREG(ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 919 (28 << TD_FIFO_CREDIT_shift))); 920 EREG(ib, VC_ENHANCE, 0); 921 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 922 EREG(ib, DB_DEBUG, 0); 923 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 924 (16 << DEPTH_FLUSH_shift) | 925 (0 << FORCE_SUMMARIZE_shift) | 926 (4 << DEPTH_PENDING_FREE_shift) | 927 (4 << DEPTH_CACHELINE_FREE_shift) | 928 0)); 929 } 930 931 PACK0(ib, SQ_VTX_BASE_VTX_LOC, 2); 932 E32(ib, 0); 933 E32(ib, 0); 934 935 PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9); 936 E32(ib, 0); // SQ_ESGS_RING_ITEMSIZE 937 E32(ib, 0); // SQ_GSVS_RING_ITEMSIZE 938 E32(ib, 0); // SQ_ESTMP_RING_ITEMSIZE 939 E32(ib, 0); // SQ_GSTMP_RING_ITEMSIZE 940 E32(ib, 0); // SQ_VSTMP_RING_ITEMSIZE 941 E32(ib, 0); // SQ_PSTMP_RING_ITEMSIZE 942 E32(ib, 0); // SQ_FBUF_RING_ITEMSIZE 943 E32(ib, 0); // SQ_REDUC_RING_ITEMSIZE 944 E32(ib, 0); // SQ_GS_VERT_ITEMSIZE 945 946 // DB 947 EREG(ib, DB_DEPTH_CONTROL, 0); 948 PACK0(ib, DB_RENDER_CONTROL, 2); 949 E32(ib, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 950 if (info->ChipFamily < CHIP_FAMILY_RV770) 951 E32(ib, FORCE_SHADER_Z_ORDER_bit); 952 else 953 E32(ib, 0); 954 EREG(ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 955 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 956 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 957 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 958 EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 959 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 960 961 PACK0(ib, DB_STENCIL_CLEAR, 2); 962 E32(ib, 0); // DB_STENCIL_CLEAR 963 E32(ib, 0); // DB_DEPTH_CLEAR 964 965 PACK0(ib, DB_STENCILREFMASK, 3); 966 E32(ib, 0); // DB_STENCILREFMASK 967 E32(ib, 0); // DB_STENCILREFMASK_BF 968 E32(ib, 0); // SX_ALPHA_REF 969 970 PACK0(ib, CB_CLRCMP_CONTROL, 4); 971 E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 972 E32(ib, 0); // CB_CLRCMP_SRC 973 E32(ib, 0); // CB_CLRCMP_DST 974 E32(ib, 0); // CB_CLRCMP_MSK 975 976 EREG(ib, CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 977 EREG(ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 978 979 PACK0(ib, SX_ALPHA_TEST_CONTROL, 5); 980 E32(ib, 0); // SX_ALPHA_TEST_CONTROL 981 E32(ib, 0x00000000); // CB_BLEND_RED 982 E32(ib, 0x00000000); // CB_BLEND_GREEN 983 E32(ib, 0x00000000); // CB_BLEND_BLUE 984 E32(ib, 0x00000000); // CB_BLEND_ALPHA 985 986 EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 987 (0 << WINDOW_Y_OFFSET_shift))); 988 989 if (info->ChipFamily < CHIP_FAMILY_RV770) 990 EREG(ib, R7xx_PA_SC_EDGERULE, 0x00000000); 991 else 992 EREG(ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 993 994 EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 995 996 END_BATCH(); 997 998 /* clip boolean is set to always visible -> doesn't matter */ 999 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1000 r600_set_clip_rect(pScrn, ib, i, 0, 0, 8192, 8192); 1001 1002 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1003 r600_set_vport_scissor(pScrn, ib, i, 0, 0, 8192, 8192); 1004 1005 BEGIN_BATCH(42); 1006 PACK0(ib, PA_SC_MPASS_PS_CNTL, 2); 1007 E32(ib, 0); 1008 if (info->ChipFamily < CHIP_FAMILY_RV770) 1009 E32(ib, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1010 else 1011 E32(ib, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1012 0x00500000)); /* ? */ 1013 1014 PACK0(ib, PA_SC_LINE_CNTL, 9); 1015 E32(ib, 0); // PA_SC_LINE_CNTL 1016 E32(ib, 0); // PA_SC_AA_CONFIG 1017 E32(ib, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL 1018 (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 1019 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ 1020 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ 1021 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1022 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ 1023 E32(ib, 0); // PA_SC_AA_SAMPLE_LOCS_MCTX 1024 E32(ib, 0); // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M 1025 1026 EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); 1027 1028 PACK0(ib, PA_CL_CLIP_CNTL, 5); 1029 E32(ib, CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1030 E32(ib, FACE_bit); // PA_SU_SC_MODE_CNTL 1031 E32(ib, VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1032 E32(ib, 0); // PA_CL_VS_OUT_CNTL 1033 E32(ib, 0); // PA_CL_NANINF_CNTL 1034 1035 PACK0(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1036 E32(ib, 0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL 1037 E32(ib, 0); // PA_SU_POLY_OFFSET_CLAMP 1038 E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_SCALE 1039 E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_OFFSET 1040 E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_SCALE 1041 E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_OFFSET 1042 1043 // SPI 1044 if (info->ChipFamily < CHIP_FAMILY_RV770) 1045 EREG(ib, R7xx_SPI_THREAD_GROUPING, 0); 1046 else 1047 EREG(ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1048 1049 PACK0(ib, SPI_INPUT_Z, 4); 1050 E32(ib, 0); // SPI_INPUT_Z 1051 E32(ib, 0); // SPI_FOG_CNTL 1052 E32(ib, 0); // SPI_FOG_FUNC_SCALE 1053 E32(ib, 0); // SPI_FOG_FUNC_BIAS 1054 1055 END_BATCH(); 1056 1057 // clear FS 1058 fs_conf.bo = accel_state->shaders_bo; 1059 r600_fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1060 1061 // VGT 1062 BEGIN_BATCH(43); 1063 PACK0(ib, VGT_MAX_VTX_INDX, 4); 1064 E32(ib, 0xffffff); // VGT_MAX_VTX_INDX 1065 E32(ib, 0); // VGT_MIN_VTX_INDX 1066 E32(ib, 0); // VGT_INDX_OFFSET 1067 E32(ib, 0); // VGT_MULTI_PRIM_IB_RESET_INDX 1068 1069 EREG(ib, VGT_PRIMITIVEID_EN, 0); 1070 EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN, 0); 1071 1072 PACK0(ib, VGT_INSTANCE_STEP_RATE_0, 2); 1073 E32(ib, 0); // VGT_INSTANCE_STEP_RATE_0 1074 E32(ib, 0); // VGT_INSTANCE_STEP_RATE_1 1075 1076 PACK0(ib, PA_SU_POINT_SIZE, 17); 1077 E32(ib, 0); // PA_SU_POINT_SIZE 1078 E32(ib, 0); // PA_SU_POINT_MINMAX 1079 E32(ib, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1080 E32(ib, 0); // PA_SC_LINE_STIPPLE 1081 E32(ib, 0); // VGT_OUTPUT_PATH_CNTL 1082 E32(ib, 0); // VGT_HOS_CNTL 1083 E32(ib, 0); // VGT_HOS_MAX_TESS_LEVEL 1084 E32(ib, 0); // VGT_HOS_MIN_TESS_LEVEL 1085 E32(ib, 0); // VGT_HOS_REUSE_DEPTH 1086 E32(ib, 0); // VGT_GROUP_PRIM_TYPE 1087 E32(ib, 0); // VGT_GROUP_FIRST_DECR 1088 E32(ib, 0); // VGT_GROUP_DECR 1089 E32(ib, 0); // VGT_GROUP_VECT_0_CNTL 1090 E32(ib, 0); // VGT_GROUP_VECT_1_CNTL 1091 E32(ib, 0); // VGT_GROUP_VECT_0_FMT_CNTL 1092 E32(ib, 0); // VGT_GROUP_VECT_1_FMT_CNTL 1093 E32(ib, 0); // VGT_GS_MODE 1094 1095 PACK0(ib, VGT_STRMOUT_EN, 3); 1096 E32(ib, 0); // VGT_STRMOUT_EN 1097 E32(ib, 0); // VGT_REUSE_OFF 1098 E32(ib, 0); // VGT_VTX_CNT_EN 1099 1100 EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); 1101 END_BATCH(); 1102} 1103 1104 1105/* 1106 * Commands 1107 */ 1108 1109void 1110r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) 1111{ 1112 RADEONInfoPtr info = RADEONPTR(pScrn); 1113 uint32_t i, count; 1114 1115 // calculate num of packets 1116 count = 2; 1117 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1118 count += (draw_conf->num_indices + 1) / 2; 1119 else 1120 count += draw_conf->num_indices; 1121 1122 BEGIN_BATCH(8 + count); 1123 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1124 PACK3(ib, IT_INDEX_TYPE, 1); 1125 E32(ib, draw_conf->index_type); 1126 PACK3(ib, IT_NUM_INSTANCES, 1); 1127 E32(ib, draw_conf->num_instances); 1128 1129 PACK3(ib, IT_DRAW_INDEX_IMMD, count); 1130 E32(ib, draw_conf->num_indices); 1131 E32(ib, draw_conf->vgt_draw_initiator); 1132 1133 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1134 for (i = 0; i < draw_conf->num_indices; i += 2) { 1135 if ((i + 1) == draw_conf->num_indices) 1136 E32(ib, indices[i]); 1137 else 1138 E32(ib, (indices[i] | (indices[i + 1] << 16))); 1139 } 1140 } else { 1141 for (i = 0; i < draw_conf->num_indices; i++) 1142 E32(ib, indices[i]); 1143 } 1144 END_BATCH(); 1145} 1146 1147void 1148r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) 1149{ 1150 RADEONInfoPtr info = RADEONPTR(pScrn); 1151 1152 BEGIN_BATCH(10); 1153 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1154 PACK3(ib, IT_INDEX_TYPE, 1); 1155 E32(ib, draw_conf->index_type); 1156 PACK3(ib, IT_NUM_INSTANCES, 1); 1157 E32(ib, draw_conf->num_instances); 1158 PACK3(ib, IT_DRAW_INDEX_AUTO, 2); 1159 E32(ib, draw_conf->num_indices); 1160 E32(ib, draw_conf->vgt_draw_initiator); 1161 END_BATCH(); 1162} 1163 1164void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1165{ 1166 RADEONInfoPtr info = RADEONPTR(pScrn); 1167 struct radeon_accel_state *accel_state = info->accel_state; 1168 draw_config_t draw_conf; 1169 vtx_resource_t vtx_res; 1170 1171 if (accel_state->vbo.vb_start_op == -1) 1172 return; 1173 1174 CLEAR (draw_conf); 1175 CLEAR (vtx_res); 1176 1177 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1178 R600IBDiscard(pScrn, accel_state->ib); 1179 return; 1180 } 1181 1182 /* Vertex buffer setup */ 1183 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1184 vtx_res.id = SQ_VTX_RESOURCE_vs; 1185 vtx_res.vtx_size_dw = vtx_size / 4; 1186 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1187 vtx_res.mem_req_size = 1; 1188 vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op; 1189 vtx_res.bo = accel_state->vbo.vb_bo; 1190 r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1191 1192 /* Draw */ 1193 draw_conf.prim_type = DI_PT_RECTLIST; 1194 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1195 draw_conf.num_instances = 1; 1196 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1197 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1198 1199 r600_draw_auto(pScrn, accel_state->ib, &draw_conf); 1200 1201 /* XXX drm should handle this in fence submit */ 1202 r600_wait_3d_idle_clean(pScrn, accel_state->ib); 1203 1204 /* sync dst surface */ 1205 r600_cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1206 accel_state->dst_size, accel_state->dst_obj.offset, 1207 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1208 1209 accel_state->vbo.vb_start_op = -1; 1210 accel_state->ib_reset_op = 0; 1211 1212#if KMS_MULTI_OP 1213 if (!info->cs) 1214#endif 1215 R600CPFlushIndirect(pScrn, accel_state->ib); 1216} 1217 1218