r6xx_accel.c revision c73da4db
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * Matthias Hopf <mhopf@suse.de> 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#include "xf86.h" 31 32#include <errno.h> 33 34#include "radeon.h" 35#include "r600_shader.h" 36#include "radeon_reg.h" 37#include "r600_reg.h" 38#include "r600_state.h" 39 40#include "radeon_drm.h" 41#include "radeon_vbo.h" 42#include "radeon_exa_shared.h" 43 44static const uint32_t R600_ROP[16] = { 45 RADEON_ROP3_ZERO, /* GXclear */ 46 RADEON_ROP3_DSa, /* Gxand */ 47 RADEON_ROP3_SDna, /* GXandReverse */ 48 RADEON_ROP3_S, /* GXcopy */ 49 RADEON_ROP3_DSna, /* GXandInverted */ 50 RADEON_ROP3_D, /* GXnoop */ 51 RADEON_ROP3_DSx, /* GXxor */ 52 RADEON_ROP3_DSo, /* GXor */ 53 RADEON_ROP3_DSon, /* GXnor */ 54 RADEON_ROP3_DSxn, /* GXequiv */ 55 RADEON_ROP3_Dn, /* GXinvert */ 56 RADEON_ROP3_SDno, /* GXorReverse */ 57 RADEON_ROP3_Sn, /* GXcopyInverted */ 58 RADEON_ROP3_DSno, /* GXorInverted */ 59 RADEON_ROP3_DSan, /* GXnand */ 60 RADEON_ROP3_ONE, /* GXset */ 61}; 62 63/* we try and batch operations together under KMS - 64 but it doesn't work yet without misrendering */ 65#define KMS_MULTI_OP 1 66 67/* Flush the indirect buffer to the kernel for submission to the card */ 68void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) 69{ 70 RADEONInfoPtr info = RADEONPTR(pScrn); 71 drmBufPtr buffer = ib; 72 int start = 0; 73 drm_radeon_indirect_t indirect; 74 75#if defined(XF86DRM_MODE) 76 if (info->cs) { 77 radeon_cs_flush_indirect(pScrn); 78 return; 79 } 80#endif 81 82 if (!buffer) return; 83 84 //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", 85 // buffer->idx); 86 87 while (buffer->used & 0x3c){ 88 BEGIN_BATCH(1); 89 E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ 90 END_BATCH(); 91 } 92 93 info->accel_state->vbo.vb_offset = 0; 94 info->accel_state->vbo.vb_start_op = -1; 95 96 //ErrorF("buffer bytes: %d\n", buffer->used); 97 98 indirect.idx = buffer->idx; 99 indirect.start = start; 100 indirect.end = buffer->used; 101 indirect.discard = 1; 102 103 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 104 &indirect, sizeof(drm_radeon_indirect_t)); 105 106} 107 108void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) 109{ 110#if defined(XF86DRM_MODE) 111 RADEONInfoPtr info = RADEONPTR(pScrn); 112 if (info->cs) { 113 radeon_ib_discard(pScrn); 114 } 115#endif 116 if (!ib) return; 117 118 ib->used = 0; 119 R600CPFlushIndirect(pScrn, ib); 120} 121 122void 123r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) 124{ 125 RADEONInfoPtr info = RADEONPTR(pScrn); 126 127 //flush caches, don't generate timestamp 128 BEGIN_BATCH(5); 129 PACK3(ib, IT_EVENT_WRITE, 1); 130 E32(ib, CACHE_FLUSH_AND_INV_EVENT); 131 // wait for 3D idle clean 132 EREG(ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | 133 WAIT_3D_IDLECLEAN_bit)); 134 END_BATCH(); 135} 136 137void 138r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) 139{ 140 RADEONInfoPtr info = RADEONPTR(pScrn); 141 142 BEGIN_BATCH(3); 143 EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); 144 END_BATCH(); 145} 146 147void 148r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) 149{ 150 RADEONInfoPtr info = RADEONPTR(pScrn); 151 152 if (info->ChipFamily < CHIP_FAMILY_RV770) { 153 BEGIN_BATCH(5); 154 PACK3(ib, IT_START_3D_CMDBUF, 1); 155 E32(ib, 0); 156 } else 157 BEGIN_BATCH(3); 158 159 PACK3(ib, IT_CONTEXT_CONTROL, 2); 160 E32(ib, 0x80000000); 161 E32(ib, 0x80000000); 162 END_BATCH(); 163 164} 165 166/* 167 * Setup of functional groups 168 */ 169 170// asic stack/thread/gpr limits - need to query the drm 171static void 172r600_sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) 173{ 174 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 175 uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 176 RADEONInfoPtr info = RADEONPTR(pScrn); 177 178 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 179 (info->ChipFamily == CHIP_FAMILY_RV620) || 180 (info->ChipFamily == CHIP_FAMILY_RS780) || 181 (info->ChipFamily == CHIP_FAMILY_RS880) || 182 (info->ChipFamily == CHIP_FAMILY_RV710)) 183 sq_config = 0; // no VC 184 else 185 sq_config = VC_ENABLE_bit; 186 187 sq_config |= (DX9_CONSTS_bit | 188 ALU_INST_PREFER_VECTOR_bit | 189 (sq_conf->ps_prio << PS_PRIO_shift) | 190 (sq_conf->vs_prio << VS_PRIO_shift) | 191 (sq_conf->gs_prio << GS_PRIO_shift) | 192 (sq_conf->es_prio << ES_PRIO_shift)); 193 194 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 195 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 196 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 197 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 198 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 199 200 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 201 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 202 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 203 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 204 205 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 206 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 207 208 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 209 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 210 211 BEGIN_BATCH(8); 212 PACK0(ib, SQ_CONFIG, 6); 213 E32(ib, sq_config); 214 E32(ib, sq_gpr_resource_mgmt_1); 215 E32(ib, sq_gpr_resource_mgmt_2); 216 E32(ib, sq_thread_resource_mgmt); 217 E32(ib, sq_stack_resource_mgmt_1); 218 E32(ib, sq_stack_resource_mgmt_2); 219 END_BATCH(); 220} 221 222void 223r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain) 224{ 225 uint32_t cb_color_info, cb_color_control; 226 unsigned pitch, slice, h, array_mode; 227 RADEONInfoPtr info = RADEONPTR(pScrn); 228 229 230#if defined(XF86DRM_MODE) 231 if (info->cs && cb_conf->surface) { 232 switch (cb_conf->surface->level[0].mode) { 233 case RADEON_SURF_MODE_1D: 234 array_mode = 2; 235 break; 236 case RADEON_SURF_MODE_2D: 237 array_mode = 4; 238 break; 239 default: 240 array_mode = 0; 241 break; 242 } 243 pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1; 244 slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1; 245 } else 246#endif 247 { 248 array_mode = cb_conf->array_mode; 249 pitch = (cb_conf->w / 8) - 1; 250 h = RADEON_ALIGN(cb_conf->h, 8); 251 slice = ((cb_conf->w * h) / 64) - 1; 252 } 253 254 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 255 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 256 (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 257 (cb_conf->number_type << NUMBER_TYPE_shift) | 258 (cb_conf->comp_swap << COMP_SWAP_shift) | 259 (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 260 if (cb_conf->read_size) 261 cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 262 if (cb_conf->blend_clamp) 263 cb_color_info |= BLEND_CLAMP_bit; 264 if (cb_conf->clear_color) 265 cb_color_info |= CLEAR_COLOR_bit; 266 if (cb_conf->blend_bypass) 267 cb_color_info |= BLEND_BYPASS_bit; 268 if (cb_conf->blend_float32) 269 cb_color_info |= BLEND_FLOAT32_bit; 270 if (cb_conf->simple_float) 271 cb_color_info |= SIMPLE_FLOAT_bit; 272 if (cb_conf->round_mode) 273 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 274 if (cb_conf->tile_compact) 275 cb_color_info |= TILE_COMPACT_bit; 276 if (cb_conf->source_format) 277 cb_color_info |= SOURCE_FORMAT_bit; 278 279 BEGIN_BATCH(3 + 2); 280 EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 281 RELOC_BATCH(cb_conf->bo, 0, domain); 282 END_BATCH(); 283 284 // rv6xx workaround 285 if ((info->ChipFamily > CHIP_FAMILY_R600) && 286 (info->ChipFamily < CHIP_FAMILY_RV770)) { 287 BEGIN_BATCH(2); 288 PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); 289 E32(ib, (2 << cb_conf->id)); 290 END_BATCH(); 291 } 292 /* Set CMASK & TILE buffer to the offset of color buffer as 293 * we don't use those this shouldn't cause any issue and we 294 * then have a valid cmd stream 295 */ 296 BEGIN_BATCH(3 + 2); 297 EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 298 RELOC_BATCH(cb_conf->bo, 0, domain); 299 END_BATCH(); 300 BEGIN_BATCH(3 + 2); 301 EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 302 RELOC_BATCH(cb_conf->bo, 0, domain); 303 END_BATCH(); 304 BEGIN_BATCH(9); 305 // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 306 EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 307 (slice << SLICE_TILE_MAX_shift))); 308 EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 309 (0 << SLICE_MAX_shift))); 310 EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 311 (0 << FMASK_TILE_MAX_shift))); 312 END_BATCH(); 313 314 BEGIN_BATCH(3 + 2); 315 EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 316 RELOC_BATCH(cb_conf->bo, 0, domain); 317 END_BATCH(); 318 319 BEGIN_BATCH(9); 320 EREG(ib, CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 321 cb_color_control = R600_ROP[cb_conf->rop] | 322 (cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift); 323 if (info->ChipFamily == CHIP_FAMILY_R600) { 324 /* no per-MRT blend on R600 */ 325 EREG(ib, CB_COLOR_CONTROL, cb_color_control); 326 EREG(ib, CB_BLEND_CONTROL, cb_conf->blendcntl); 327 } else { 328 if (cb_conf->blend_enable) 329 cb_color_control |= PER_MRT_BLEND_bit; 330 EREG(ib, CB_COLOR_CONTROL, cb_color_control); 331 EREG(ib, CB_BLEND0_CONTROL, cb_conf->blendcntl); 332 } 333 END_BATCH(); 334} 335 336static void 337r600_cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, 338 uint32_t size, uint64_t mc_addr, 339 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 340{ 341 RADEONInfoPtr info = RADEONPTR(pScrn); 342 uint32_t cp_coher_size; 343 if (size == 0xffffffff) 344 cp_coher_size = 0xffffffff; 345 else 346 cp_coher_size = ((size + 255) >> 8); 347 348 BEGIN_BATCH(5 + 2); 349 PACK3(ib, IT_SURFACE_SYNC, 4); 350 E32(ib, sync_type); 351 E32(ib, cp_coher_size); 352 E32(ib, (mc_addr >> 8)); 353 E32(ib, 10); /* poll interval */ 354 RELOC_BATCH(bo, rdomains, wdomain); 355 END_BATCH(); 356} 357 358/* inserts a wait for vline in the command stream */ 359void 360r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, 361 xf86CrtcPtr crtc, int start, int stop) 362{ 363 RADEONInfoPtr info = RADEONPTR(pScrn); 364 uint32_t offset; 365 366 if (!crtc) 367 return; 368 369 if (!crtc->enabled) 370 return; 371 372 if (info->cs) { 373 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 374 return; 375 } else { 376#ifdef USE_EXA 377 if (info->useEXA) 378 offset = exaGetPixmapOffset(pPix); 379 else 380#endif 381 offset = pPix->devPrivate.ptr - info->FB; 382 383 /* if drawing to front buffer */ 384 if (offset != 0) 385 return; 386 } 387 388 start = max(start, crtc->y); 389 stop = min(stop, crtc->y + crtc->mode.VDisplay); 390 391 if (start >= stop) 392 return; 393 394#if defined(XF86DRM_MODE) 395 if (info->cs) { 396 drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; 397 398 BEGIN_BATCH(11); 399 /* set the VLINE range */ 400 EREG(ib, AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 401 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 402 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 403 404 /* tell the CP to poll the VLINE state register */ 405 PACK3(ib, IT_WAIT_REG_MEM, 6); 406 E32(ib, IT_WAIT_REG | IT_WAIT_EQ); 407 E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 408 E32(ib, 0); 409 E32(ib, 0); // Ref value 410 E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask 411 E32(ib, 10); // Wait interval 412 /* add crtc reloc */ 413 PACK3(ib, IT_NOP, 1); 414 E32(ib, drmmode_crtc->mode_crtc->crtc_id); 415 END_BATCH(); 416 } else 417#endif 418 { 419 RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; 420 421 BEGIN_BATCH(9); 422 /* set the VLINE range */ 423 EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, 424 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 425 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 426 427 /* tell the CP to poll the VLINE state register */ 428 PACK3(ib, IT_WAIT_REG_MEM, 6); 429 E32(ib, IT_WAIT_REG | IT_WAIT_EQ); 430 E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset)); 431 E32(ib, 0); 432 E32(ib, 0); // Ref value 433 E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask 434 E32(ib, 10); // Wait interval 435 END_BATCH(); 436 } 437} 438 439void 440r600_set_spi(ScrnInfoPtr pScrn, drmBufPtr ib, int vs_export_count, int num_interp) 441{ 442 RADEONInfoPtr info = RADEONPTR(pScrn); 443 444 BEGIN_BATCH(8); 445 /* Interpolator setup */ 446 EREG(ib, SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 447 PACK0(ib, SPI_PS_IN_CONTROL_0, 3); 448 E32(ib, (num_interp << NUM_INTERP_shift)); 449 E32(ib, 0); 450 E32(ib, 0); 451 END_BATCH(); 452} 453 454void 455r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain) 456{ 457 RADEONInfoPtr info = RADEONPTR(pScrn); 458 uint32_t sq_pgm_resources; 459 460 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 461 (fs_conf->stack_size << STACK_SIZE_shift)); 462 463 if (fs_conf->dx10_clamp) 464 sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 465 466 BEGIN_BATCH(3 + 2); 467 EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 468 RELOC_BATCH(fs_conf->bo, domain, 0); 469 END_BATCH(); 470 471 BEGIN_BATCH(6); 472 EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); 473 EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); 474 END_BATCH(); 475} 476 477void 478r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain) 479{ 480 RADEONInfoPtr info = RADEONPTR(pScrn); 481 uint32_t sq_pgm_resources; 482 483 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 484 (vs_conf->stack_size << STACK_SIZE_shift)); 485 486 if (vs_conf->dx10_clamp) 487 sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 488 if (vs_conf->fetch_cache_lines) 489 sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 490 if (vs_conf->uncached_first_inst) 491 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 492 493 /* flush SQ cache */ 494 r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, 495 vs_conf->shader_size, vs_conf->shader_addr, 496 vs_conf->bo, domain, 0); 497 498 BEGIN_BATCH(3 + 2); 499 EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 500 RELOC_BATCH(vs_conf->bo, domain, 0); 501 END_BATCH(); 502 503 BEGIN_BATCH(6); 504 EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); 505 EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); 506 END_BATCH(); 507} 508 509void 510r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain) 511{ 512 RADEONInfoPtr info = RADEONPTR(pScrn); 513 uint32_t sq_pgm_resources; 514 515 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 516 (ps_conf->stack_size << STACK_SIZE_shift)); 517 518 if (ps_conf->dx10_clamp) 519 sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 520 if (ps_conf->fetch_cache_lines) 521 sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 522 if (ps_conf->uncached_first_inst) 523 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 524 if (ps_conf->clamp_consts) 525 sq_pgm_resources |= CLAMP_CONSTS_bit; 526 527 /* flush SQ cache */ 528 r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, 529 ps_conf->shader_size, ps_conf->shader_addr, 530 ps_conf->bo, domain, 0); 531 532 BEGIN_BATCH(3 + 2); 533 EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 534 RELOC_BATCH(ps_conf->bo, domain, 0); 535 END_BATCH(); 536 537 BEGIN_BATCH(9); 538 EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); 539 EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 540 EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); 541 END_BATCH(); 542} 543 544void 545r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) 546{ 547 RADEONInfoPtr info = RADEONPTR(pScrn); 548 int i; 549 const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 550 551 BEGIN_BATCH(2 + countreg); 552 PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 553 for (i = 0; i < countreg; i++) 554 EFLOAT(ib, const_buf[i]); 555 END_BATCH(); 556} 557 558void 559r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) 560{ 561 RADEONInfoPtr info = RADEONPTR(pScrn); 562 /* bool register order is: ps, vs, gs; one register each 563 * 1 bits per bool; 32 bools each for ps, vs, gs. 564 */ 565 BEGIN_BATCH(3); 566 EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 567 END_BATCH(); 568} 569 570static void 571r600_set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain) 572{ 573 RADEONInfoPtr info = RADEONPTR(pScrn); 574 struct radeon_accel_state *accel_state = info->accel_state; 575 uint32_t sq_vtx_constant_word2; 576 577 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 578 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 579 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 580 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 581 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 582 if (res->clamp_x) 583 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 584 585 if (res->format_comp_all) 586 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 587 588 if (res->srf_mode_all) 589 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 590 591 /* flush vertex cache */ 592 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 593 (info->ChipFamily == CHIP_FAMILY_RV620) || 594 (info->ChipFamily == CHIP_FAMILY_RS780) || 595 (info->ChipFamily == CHIP_FAMILY_RS880) || 596 (info->ChipFamily == CHIP_FAMILY_RV710)) 597 r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, 598 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 599 res->bo, 600 domain, 0); 601 else 602 r600_cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit, 603 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 604 res->bo, 605 domain, 0); 606 607 BEGIN_BATCH(9 + 2); 608 PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 609 E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 610 E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE 611 E32(ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 612 E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 613 E32(ib, 0); // 4: n/a 614 E32(ib, 0); // 5: n/a 615 E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 616 RELOC_BATCH(res->bo, domain, 0); 617 END_BATCH(); 618} 619 620void 621r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain) 622{ 623 RADEONInfoPtr info = RADEONPTR(pScrn); 624 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 625 uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 626 uint32_t array_mode, pitch; 627 628#if defined(XF86DRM_MODE) 629 if (info->cs && tex_res->surface) { 630 switch (tex_res->surface->level[0].mode) { 631 case RADEON_SURF_MODE_1D: 632 array_mode = 2; 633 break; 634 case RADEON_SURF_MODE_2D: 635 array_mode = 4; 636 break; 637 default: 638 array_mode = 0; 639 break; 640 } 641 pitch = tex_res->surface->level[0].nblk_x >> 3; 642 } else 643#endif 644 { 645 array_mode = tex_res->tile_mode; 646 pitch = (tex_res->pitch + 7) >> 3; 647 } 648 649 sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 650 (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 651 652 if (tex_res->w) 653 sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) | 654 ((tex_res->w - 1) << TEX_WIDTH_shift)); 655 656 if (tex_res->tile_type) 657 sq_tex_resource_word0 |= TILE_TYPE_bit; 658 659 sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 660 661 if (tex_res->h) 662 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 663 if (tex_res->depth) 664 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 665 666 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 667 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 668 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 669 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 670 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 671 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 672 (tex_res->request_size << REQUEST_SIZE_shift) | 673 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 674 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 675 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 676 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 677 (tex_res->base_level << BASE_LEVEL_shift)); 678 679 if (tex_res->srf_mode_all) 680 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 681 if (tex_res->force_degamma) 682 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 683 684 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 685 (tex_res->base_array << BASE_ARRAY_shift) | 686 (tex_res->last_array << LAST_ARRAY_shift)); 687 688 sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 689 (tex_res->perf_modulation << PERF_MODULATION_shift) | 690 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 691 692 if (tex_res->interlaced) 693 sq_tex_resource_word6 |= INTERLACED_bit; 694 695 /* flush texture cache */ 696 r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, 697 tex_res->size, tex_res->base, 698 tex_res->bo, domain, 0); 699 700 BEGIN_BATCH(9 + 4); 701 PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 702 E32(ib, sq_tex_resource_word0); 703 E32(ib, sq_tex_resource_word1); 704 E32(ib, ((tex_res->base) >> 8)); 705 E32(ib, ((tex_res->mip_base) >> 8)); 706 E32(ib, sq_tex_resource_word4); 707 E32(ib, sq_tex_resource_word5); 708 E32(ib, sq_tex_resource_word6); 709 RELOC_BATCH(tex_res->bo, domain, 0); 710 RELOC_BATCH(tex_res->mip_bo, domain, 0); 711 END_BATCH(); 712} 713 714void 715r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) 716{ 717 RADEONInfoPtr info = RADEONPTR(pScrn); 718 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 719 720 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 721 (s->clamp_y << CLAMP_Y_shift) | 722 (s->clamp_z << CLAMP_Z_shift) | 723 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 724 (s->xy_min_filter << XY_MIN_FILTER_shift) | 725 (s->z_filter << Z_FILTER_shift) | 726 (s->mip_filter << MIP_FILTER_shift) | 727 (s->border_color << BORDER_COLOR_TYPE_shift) | 728 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 729 (s->chroma_key << CHROMA_KEY_shift)); 730 if (s->point_sampling_clamp) 731 sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 732 if (s->tex_array_override) 733 sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 734 if (s->lod_uses_minor_axis) 735 sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 736 737 sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 738 (s->max_lod << MAX_LOD_shift) | 739 (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 740 741 sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 742 (s->perf_mip << PERF_MIP_shift) | 743 (s->perf_z << PERF_Z_shift)); 744 if (s->mc_coord_truncate) 745 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 746 if (s->force_degamma) 747 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 748 if (s->high_precision_filter) 749 sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 750 if (s->fetch_4) 751 sq_tex_sampler_word2 |= FETCH_4_bit; 752 if (s->sample_is_pcf) 753 sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 754 if (s->type) 755 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 756 757 BEGIN_BATCH(5); 758 PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 759 E32(ib, sq_tex_sampler_word0); 760 E32(ib, sq_tex_sampler_word1); 761 E32(ib, sq_tex_sampler_word2); 762 END_BATCH(); 763} 764 765//XXX deal with clip offsets in clip setup 766void 767r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 768{ 769 RADEONInfoPtr info = RADEONPTR(pScrn); 770 771 BEGIN_BATCH(4); 772 PACK0(ib, PA_SC_SCREEN_SCISSOR_TL, 2); 773 E32(ib, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 774 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 775 E32(ib, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 776 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 777 END_BATCH(); 778} 779 780void 781r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 782{ 783 RADEONInfoPtr info = RADEONPTR(pScrn); 784 785 BEGIN_BATCH(4); 786 PACK0(ib, PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 787 E32(ib, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 788 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 789 WINDOW_OFFSET_DISABLE_bit)); 790 E32(ib, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 791 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 792 END_BATCH(); 793} 794 795void 796r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 797{ 798 RADEONInfoPtr info = RADEONPTR(pScrn); 799 800 BEGIN_BATCH(4); 801 PACK0(ib, PA_SC_GENERIC_SCISSOR_TL, 2); 802 E32(ib, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 803 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 804 WINDOW_OFFSET_DISABLE_bit)); 805 E32(ib, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 806 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 807 END_BATCH(); 808} 809 810void 811r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 812{ 813 RADEONInfoPtr info = RADEONPTR(pScrn); 814 815 BEGIN_BATCH(4); 816 PACK0(ib, PA_SC_WINDOW_SCISSOR_TL, 2); 817 E32(ib, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 818 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 819 WINDOW_OFFSET_DISABLE_bit)); 820 E32(ib, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 821 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 822 END_BATCH(); 823} 824 825void 826r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 827{ 828 RADEONInfoPtr info = RADEONPTR(pScrn); 829 830 BEGIN_BATCH(4); 831 PACK0(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 832 E32(ib, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 833 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 834 E32(ib, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 835 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 836 END_BATCH(); 837} 838 839/* 840 * Setup of default state 841 */ 842 843void 844r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) 845{ 846 tex_resource_t tex_res; 847 shader_config_t fs_conf; 848 sq_config_t sq_conf; 849 int i; 850 RADEONInfoPtr info = RADEONPTR(pScrn); 851 struct radeon_accel_state *accel_state = info->accel_state; 852 853 if (accel_state->XInited3D) 854 return; 855 856 memset(&tex_res, 0, sizeof(tex_resource_t)); 857 memset(&fs_conf, 0, sizeof(shader_config_t)); 858 859 accel_state->XInited3D = TRUE; 860 861 r600_start_3d(pScrn, accel_state->ib); 862 863 // SQ 864 sq_conf.ps_prio = 0; 865 sq_conf.vs_prio = 1; 866 sq_conf.gs_prio = 2; 867 sq_conf.es_prio = 3; 868 // need to set stack/thread/gpr limits based on the asic 869 // for now just set them low enough so any card will work 870 // see r600_cp.c in the drm 871 switch (info->ChipFamily) { 872 case CHIP_FAMILY_R600: 873 sq_conf.num_ps_gprs = 192; 874 sq_conf.num_vs_gprs = 56; 875 sq_conf.num_temp_gprs = 4; 876 sq_conf.num_gs_gprs = 0; 877 sq_conf.num_es_gprs = 0; 878 sq_conf.num_ps_threads = 136; 879 sq_conf.num_vs_threads = 48; 880 sq_conf.num_gs_threads = 4; 881 sq_conf.num_es_threads = 4; 882 sq_conf.num_ps_stack_entries = 128; 883 sq_conf.num_vs_stack_entries = 128; 884 sq_conf.num_gs_stack_entries = 0; 885 sq_conf.num_es_stack_entries = 0; 886 break; 887 case CHIP_FAMILY_RV630: 888 case CHIP_FAMILY_RV635: 889 sq_conf.num_ps_gprs = 84; 890 sq_conf.num_vs_gprs = 36; 891 sq_conf.num_temp_gprs = 4; 892 sq_conf.num_gs_gprs = 0; 893 sq_conf.num_es_gprs = 0; 894 sq_conf.num_ps_threads = 144; 895 sq_conf.num_vs_threads = 40; 896 sq_conf.num_gs_threads = 4; 897 sq_conf.num_es_threads = 4; 898 sq_conf.num_ps_stack_entries = 40; 899 sq_conf.num_vs_stack_entries = 40; 900 sq_conf.num_gs_stack_entries = 32; 901 sq_conf.num_es_stack_entries = 16; 902 break; 903 case CHIP_FAMILY_RV610: 904 case CHIP_FAMILY_RV620: 905 case CHIP_FAMILY_RS780: 906 case CHIP_FAMILY_RS880: 907 default: 908 sq_conf.num_ps_gprs = 84; 909 sq_conf.num_vs_gprs = 36; 910 sq_conf.num_temp_gprs = 4; 911 sq_conf.num_gs_gprs = 0; 912 sq_conf.num_es_gprs = 0; 913 sq_conf.num_ps_threads = 136; 914 sq_conf.num_vs_threads = 48; 915 sq_conf.num_gs_threads = 4; 916 sq_conf.num_es_threads = 4; 917 sq_conf.num_ps_stack_entries = 40; 918 sq_conf.num_vs_stack_entries = 40; 919 sq_conf.num_gs_stack_entries = 32; 920 sq_conf.num_es_stack_entries = 16; 921 break; 922 case CHIP_FAMILY_RV670: 923 sq_conf.num_ps_gprs = 144; 924 sq_conf.num_vs_gprs = 40; 925 sq_conf.num_temp_gprs = 4; 926 sq_conf.num_gs_gprs = 0; 927 sq_conf.num_es_gprs = 0; 928 sq_conf.num_ps_threads = 136; 929 sq_conf.num_vs_threads = 48; 930 sq_conf.num_gs_threads = 4; 931 sq_conf.num_es_threads = 4; 932 sq_conf.num_ps_stack_entries = 40; 933 sq_conf.num_vs_stack_entries = 40; 934 sq_conf.num_gs_stack_entries = 32; 935 sq_conf.num_es_stack_entries = 16; 936 break; 937 case CHIP_FAMILY_RV770: 938 sq_conf.num_ps_gprs = 192; 939 sq_conf.num_vs_gprs = 56; 940 sq_conf.num_temp_gprs = 4; 941 sq_conf.num_gs_gprs = 0; 942 sq_conf.num_es_gprs = 0; 943 sq_conf.num_ps_threads = 188; 944 sq_conf.num_vs_threads = 60; 945 sq_conf.num_gs_threads = 0; 946 sq_conf.num_es_threads = 0; 947 sq_conf.num_ps_stack_entries = 256; 948 sq_conf.num_vs_stack_entries = 256; 949 sq_conf.num_gs_stack_entries = 0; 950 sq_conf.num_es_stack_entries = 0; 951 break; 952 case CHIP_FAMILY_RV730: 953 case CHIP_FAMILY_RV740: 954 sq_conf.num_ps_gprs = 84; 955 sq_conf.num_vs_gprs = 36; 956 sq_conf.num_temp_gprs = 4; 957 sq_conf.num_gs_gprs = 0; 958 sq_conf.num_es_gprs = 0; 959 sq_conf.num_ps_threads = 188; 960 sq_conf.num_vs_threads = 60; 961 sq_conf.num_gs_threads = 0; 962 sq_conf.num_es_threads = 0; 963 sq_conf.num_ps_stack_entries = 128; 964 sq_conf.num_vs_stack_entries = 128; 965 sq_conf.num_gs_stack_entries = 0; 966 sq_conf.num_es_stack_entries = 0; 967 break; 968 case CHIP_FAMILY_RV710: 969 sq_conf.num_ps_gprs = 192; 970 sq_conf.num_vs_gprs = 56; 971 sq_conf.num_temp_gprs = 4; 972 sq_conf.num_gs_gprs = 0; 973 sq_conf.num_es_gprs = 0; 974 sq_conf.num_ps_threads = 144; 975 sq_conf.num_vs_threads = 48; 976 sq_conf.num_gs_threads = 0; 977 sq_conf.num_es_threads = 0; 978 sq_conf.num_ps_stack_entries = 128; 979 sq_conf.num_vs_stack_entries = 128; 980 sq_conf.num_gs_stack_entries = 0; 981 sq_conf.num_es_stack_entries = 0; 982 break; 983 } 984 985 r600_sq_setup(pScrn, ib, &sq_conf); 986 987 /* set fake reloc for unused depth */ 988 BEGIN_BATCH(3 + 2); 989 EREG(ib, DB_DEPTH_INFO, 0); 990 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 991 END_BATCH(); 992 993 BEGIN_BATCH(80); 994 if (info->ChipFamily < CHIP_FAMILY_RV770) { 995 EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 996 (28 << TD_FIFO_CREDIT_shift))); 997 EREG(ib, VC_ENHANCE, 0); 998 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 999 EREG(ib, DB_DEBUG, 0x82000000); /* ? */ 1000 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 1001 (16 << DEPTH_FLUSH_shift) | 1002 (0 << FORCE_SUMMARIZE_shift) | 1003 (4 << DEPTH_PENDING_FREE_shift) | 1004 (16 << DEPTH_CACHELINE_FREE_shift) | 1005 0)); 1006 } else { 1007 EREG(ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 1008 (28 << TD_FIFO_CREDIT_shift))); 1009 EREG(ib, VC_ENHANCE, 0); 1010 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 1011 EREG(ib, DB_DEBUG, 0); 1012 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 1013 (16 << DEPTH_FLUSH_shift) | 1014 (0 << FORCE_SUMMARIZE_shift) | 1015 (4 << DEPTH_PENDING_FREE_shift) | 1016 (4 << DEPTH_CACHELINE_FREE_shift) | 1017 0)); 1018 } 1019 1020 PACK0(ib, SQ_VTX_BASE_VTX_LOC, 2); 1021 E32(ib, 0); 1022 E32(ib, 0); 1023 1024 PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9); 1025 E32(ib, 0); // SQ_ESGS_RING_ITEMSIZE 1026 E32(ib, 0); // SQ_GSVS_RING_ITEMSIZE 1027 E32(ib, 0); // SQ_ESTMP_RING_ITEMSIZE 1028 E32(ib, 0); // SQ_GSTMP_RING_ITEMSIZE 1029 E32(ib, 0); // SQ_VSTMP_RING_ITEMSIZE 1030 E32(ib, 0); // SQ_PSTMP_RING_ITEMSIZE 1031 E32(ib, 0); // SQ_FBUF_RING_ITEMSIZE 1032 E32(ib, 0); // SQ_REDUC_RING_ITEMSIZE 1033 E32(ib, 0); // SQ_GS_VERT_ITEMSIZE 1034 1035 // DB 1036 EREG(ib, DB_DEPTH_CONTROL, 0); 1037 PACK0(ib, DB_RENDER_CONTROL, 2); 1038 E32(ib, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 1039 if (info->ChipFamily < CHIP_FAMILY_RV770) 1040 E32(ib, FORCE_SHADER_Z_ORDER_bit); 1041 else 1042 E32(ib, 0); 1043 EREG(ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 1044 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 1045 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 1046 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 1047 EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 1048 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 1049 1050 PACK0(ib, DB_STENCIL_CLEAR, 2); 1051 E32(ib, 0); // DB_STENCIL_CLEAR 1052 E32(ib, 0); // DB_DEPTH_CLEAR 1053 1054 PACK0(ib, DB_STENCILREFMASK, 3); 1055 E32(ib, 0); // DB_STENCILREFMASK 1056 E32(ib, 0); // DB_STENCILREFMASK_BF 1057 E32(ib, 0); // SX_ALPHA_REF 1058 1059 PACK0(ib, CB_CLRCMP_CONTROL, 4); 1060 E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 1061 E32(ib, 0); // CB_CLRCMP_SRC 1062 E32(ib, 0); // CB_CLRCMP_DST 1063 E32(ib, 0); // CB_CLRCMP_MSK 1064 1065 EREG(ib, CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 1066 EREG(ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 1067 1068 PACK0(ib, SX_ALPHA_TEST_CONTROL, 5); 1069 E32(ib, 0); // SX_ALPHA_TEST_CONTROL 1070 E32(ib, 0x00000000); // CB_BLEND_RED 1071 E32(ib, 0x00000000); // CB_BLEND_GREEN 1072 E32(ib, 0x00000000); // CB_BLEND_BLUE 1073 E32(ib, 0x00000000); // CB_BLEND_ALPHA 1074 1075 EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 1076 (0 << WINDOW_Y_OFFSET_shift))); 1077 1078 if (info->ChipFamily < CHIP_FAMILY_RV770) 1079 EREG(ib, R7xx_PA_SC_EDGERULE, 0x00000000); 1080 else 1081 EREG(ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 1082 1083 EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 1084 1085 END_BATCH(); 1086 1087 /* clip boolean is set to always visible -> doesn't matter */ 1088 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1089 r600_set_clip_rect(pScrn, ib, i, 0, 0, 8192, 8192); 1090 1091 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1092 r600_set_vport_scissor(pScrn, ib, i, 0, 0, 8192, 8192); 1093 1094 BEGIN_BATCH(49); 1095 PACK0(ib, PA_SC_MPASS_PS_CNTL, 2); 1096 E32(ib, 0); 1097 if (info->ChipFamily < CHIP_FAMILY_RV770) 1098 E32(ib, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1099 else 1100 E32(ib, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1101 0x00500000)); /* ? */ 1102 1103 PACK0(ib, PA_SC_LINE_CNTL, 9); 1104 E32(ib, 0); // PA_SC_LINE_CNTL 1105 E32(ib, 0); // PA_SC_AA_CONFIG 1106 E32(ib, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL 1107 (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 1108 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ 1109 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ 1110 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1111 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ 1112 E32(ib, 0); // PA_SC_AA_SAMPLE_LOCS_MCTX 1113 E32(ib, 0); // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M 1114 1115 EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); 1116 1117 PACK0(ib, PA_CL_CLIP_CNTL, 5); 1118 E32(ib, CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1119 E32(ib, FACE_bit); // PA_SU_SC_MODE_CNTL 1120 E32(ib, VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1121 E32(ib, 0); // PA_CL_VS_OUT_CNTL 1122 E32(ib, 0); // PA_CL_NANINF_CNTL 1123 1124 PACK0(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1125 E32(ib, 0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL 1126 E32(ib, 0); // PA_SU_POLY_OFFSET_CLAMP 1127 E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_SCALE 1128 E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_OFFSET 1129 E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_SCALE 1130 E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_OFFSET 1131 1132 // SPI 1133 if (info->ChipFamily < CHIP_FAMILY_RV770) 1134 EREG(ib, R7xx_SPI_THREAD_GROUPING, 0); 1135 else 1136 EREG(ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1137 1138 /* default Interpolator setup */ 1139 EREG(ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1140 (1 << SEMANTIC_1_shift))); 1141 PACK0(ib, SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1142 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1143 E32(ib, ((0 << SEMANTIC_shift) | 1144 (0x01 << DEFAULT_VAL_shift) | 1145 SEL_CENTROID_bit)); 1146 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1147 E32(ib, ((1 << SEMANTIC_shift) | 1148 (0x01 << DEFAULT_VAL_shift) | 1149 SEL_CENTROID_bit)); 1150 1151 PACK0(ib, SPI_INPUT_Z, 4); 1152 E32(ib, 0); // SPI_INPUT_Z 1153 E32(ib, 0); // SPI_FOG_CNTL 1154 E32(ib, 0); // SPI_FOG_FUNC_SCALE 1155 E32(ib, 0); // SPI_FOG_FUNC_BIAS 1156 1157 END_BATCH(); 1158 1159 // clear FS 1160 fs_conf.bo = accel_state->shaders_bo; 1161 r600_fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1162 1163 // VGT 1164 BEGIN_BATCH(46); 1165 PACK0(ib, VGT_MAX_VTX_INDX, 4); 1166 E32(ib, 0xffffff); // VGT_MAX_VTX_INDX 1167 E32(ib, 0); // VGT_MIN_VTX_INDX 1168 E32(ib, 0); // VGT_INDX_OFFSET 1169 E32(ib, 0); // VGT_MULTI_PRIM_IB_RESET_INDX 1170 1171 EREG(ib, VGT_PRIMITIVEID_EN, 0); 1172 EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN, 0); 1173 1174 PACK0(ib, VGT_INSTANCE_STEP_RATE_0, 2); 1175 E32(ib, 0); // VGT_INSTANCE_STEP_RATE_0 1176 E32(ib, 0); // VGT_INSTANCE_STEP_RATE_1 1177 1178 PACK0(ib, PA_SU_POINT_SIZE, 17); 1179 E32(ib, 0); // PA_SU_POINT_SIZE 1180 E32(ib, 0); // PA_SU_POINT_MINMAX 1181 E32(ib, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1182 E32(ib, 0); // PA_SC_LINE_STIPPLE 1183 E32(ib, 0); // VGT_OUTPUT_PATH_CNTL 1184 E32(ib, 0); // VGT_HOS_CNTL 1185 E32(ib, 0); // VGT_HOS_MAX_TESS_LEVEL 1186 E32(ib, 0); // VGT_HOS_MIN_TESS_LEVEL 1187 E32(ib, 0); // VGT_HOS_REUSE_DEPTH 1188 E32(ib, 0); // VGT_GROUP_PRIM_TYPE 1189 E32(ib, 0); // VGT_GROUP_FIRST_DECR 1190 E32(ib, 0); // VGT_GROUP_DECR 1191 E32(ib, 0); // VGT_GROUP_VECT_0_CNTL 1192 E32(ib, 0); // VGT_GROUP_VECT_1_CNTL 1193 E32(ib, 0); // VGT_GROUP_VECT_0_FMT_CNTL 1194 E32(ib, 0); // VGT_GROUP_VECT_1_FMT_CNTL 1195 E32(ib, 0); // VGT_GS_MODE 1196 1197 PACK0(ib, VGT_STRMOUT_EN, 3); 1198 E32(ib, 0); // VGT_STRMOUT_EN 1199 E32(ib, 0); // VGT_REUSE_OFF 1200 E32(ib, 0); // VGT_VTX_CNT_EN 1201 1202 EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); 1203 EREG(ib, SX_MISC, 0); 1204 END_BATCH(); 1205} 1206 1207 1208/* 1209 * Commands 1210 */ 1211 1212void 1213r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) 1214{ 1215 RADEONInfoPtr info = RADEONPTR(pScrn); 1216 uint32_t i, count; 1217 1218 // calculate num of packets 1219 count = 2; 1220 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1221 count += (draw_conf->num_indices + 1) / 2; 1222 else 1223 count += draw_conf->num_indices; 1224 1225 BEGIN_BATCH(8 + count); 1226 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1227 PACK3(ib, IT_INDEX_TYPE, 1); 1228#if X_BYTE_ORDER == X_BIG_ENDIAN 1229 E32(ib, IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1230#else 1231 E32(ib, draw_conf->index_type); 1232#endif 1233 PACK3(ib, IT_NUM_INSTANCES, 1); 1234 E32(ib, draw_conf->num_instances); 1235 1236 PACK3(ib, IT_DRAW_INDEX_IMMD, count); 1237 E32(ib, draw_conf->num_indices); 1238 E32(ib, draw_conf->vgt_draw_initiator); 1239 1240 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1241 for (i = 0; i < draw_conf->num_indices; i += 2) { 1242 if ((i + 1) == draw_conf->num_indices) 1243 E32(ib, indices[i]); 1244 else 1245 E32(ib, (indices[i] | (indices[i + 1] << 16))); 1246 } 1247 } else { 1248 for (i = 0; i < draw_conf->num_indices; i++) 1249 E32(ib, indices[i]); 1250 } 1251 END_BATCH(); 1252} 1253 1254void 1255r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) 1256{ 1257 RADEONInfoPtr info = RADEONPTR(pScrn); 1258 1259 BEGIN_BATCH(10); 1260 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1261 PACK3(ib, IT_INDEX_TYPE, 1); 1262#if X_BYTE_ORDER == X_BIG_ENDIAN 1263 E32(ib, IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1264#else 1265 E32(ib, draw_conf->index_type); 1266#endif 1267 PACK3(ib, IT_NUM_INSTANCES, 1); 1268 E32(ib, draw_conf->num_instances); 1269 PACK3(ib, IT_DRAW_INDEX_AUTO, 2); 1270 E32(ib, draw_conf->num_indices); 1271 E32(ib, draw_conf->vgt_draw_initiator); 1272 END_BATCH(); 1273} 1274 1275void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1276{ 1277 RADEONInfoPtr info = RADEONPTR(pScrn); 1278 struct radeon_accel_state *accel_state = info->accel_state; 1279 draw_config_t draw_conf; 1280 vtx_resource_t vtx_res; 1281 1282 if (accel_state->vbo.vb_start_op == -1) 1283 return; 1284 1285 CLEAR (draw_conf); 1286 CLEAR (vtx_res); 1287 1288 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1289 R600IBDiscard(pScrn, accel_state->ib); 1290 return; 1291 } 1292 1293 /* Vertex buffer setup */ 1294 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1295 vtx_res.id = SQ_VTX_RESOURCE_vs; 1296 vtx_res.vtx_size_dw = vtx_size / 4; 1297 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1298 vtx_res.mem_req_size = 1; 1299 vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op; 1300 vtx_res.bo = accel_state->vbo.vb_bo; 1301#if X_BYTE_ORDER == X_BIG_ENDIAN 1302 vtx_res.endian = SQ_ENDIAN_8IN32; 1303#endif 1304 r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1305 1306 /* Draw */ 1307 draw_conf.prim_type = DI_PT_RECTLIST; 1308 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1309 draw_conf.num_instances = 1; 1310 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1311 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1312 1313 r600_draw_auto(pScrn, accel_state->ib, &draw_conf); 1314 1315 /* XXX drm should handle this in fence submit */ 1316 r600_wait_3d_idle_clean(pScrn, accel_state->ib); 1317 1318 /* sync dst surface */ 1319 r600_cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1320 accel_state->dst_size, accel_state->dst_obj.offset, 1321 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1322 1323 accel_state->vbo.vb_start_op = -1; 1324 accel_state->ib_reset_op = 0; 1325 1326#if KMS_MULTI_OP 1327 if (!info->cs) 1328#endif 1329 R600CPFlushIndirect(pScrn, accel_state->ib); 1330} 1331 1332