r6xx_accel.c revision ad43ddac
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * Matthias Hopf <mhopf@suse.de> 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#include "xf86.h" 31 32#include <errno.h> 33 34#include "radeon.h" 35#include "r600_shader.h" 36#include "radeon_reg.h" 37#include "r600_reg.h" 38#include "r600_state.h" 39 40#include "radeon_drm.h" 41#include "radeon_vbo.h" 42 43/* we try and batch operations together under KMS - 44 but it doesn't work yet without misrendering */ 45#define KMS_MULTI_OP 1 46 47/* Flush the indirect buffer to the kernel for submission to the card */ 48void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) 49{ 50 RADEONInfoPtr info = RADEONPTR(pScrn); 51 drmBufPtr buffer = ib; 52 int start = 0; 53 drm_radeon_indirect_t indirect; 54 55#if defined(XF86DRM_MODE) 56 if (info->cs) { 57 radeon_cs_flush_indirect(pScrn); 58 return; 59 } 60#endif 61 62 if (!buffer) return; 63 64 //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", 65 // buffer->idx); 66 67 while (buffer->used & 0x3c){ 68 BEGIN_BATCH(1); 69 E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ 70 END_BATCH(); 71 } 72 73 //ErrorF("buffer bytes: %d\n", buffer->used); 74 75 indirect.idx = buffer->idx; 76 indirect.start = start; 77 indirect.end = buffer->used; 78 indirect.discard = 1; 79 80 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 81 &indirect, sizeof(drm_radeon_indirect_t)); 82 83} 84 85void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) 86{ 87#if defined(XF86DRM_MODE) 88 int ret; 89 RADEONInfoPtr info = RADEONPTR(pScrn); 90 if (info->cs) { 91 if (info->accel_state->ib_reset_op) { 92 /* if we have data just reset the CS and ignore the operation */ 93 info->cs->cdw = info->accel_state->ib_reset_op; 94 info->accel_state->ib_reset_op = 0; 95 return; 96 } 97 if (info->accel_state->vb_ptr) { 98 info->accel_state->vb_ptr = NULL; 99 } 100 101 info->accel_state->vb_offset = 0; 102 info->accel_state->vb_start_op = -1; 103 104 if (CS_FULL(info->cs)) { 105 radeon_cs_flush_indirect(pScrn); 106 return; 107 } 108 radeon_cs_erase(info->cs); 109 ret = radeon_cs_space_check(info->cs); 110 if (ret) 111 ErrorF("space check failed in flush\n"); 112 if (info->dri2.enabled) { 113 info->accel_state->XInited3D = FALSE; 114 info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN; 115 } 116 } 117#endif 118 if (!ib) return; 119 120 ib->used = 0; 121 R600CPFlushIndirect(pScrn, ib); 122} 123 124void 125wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) 126{ 127 RADEONInfoPtr info = RADEONPTR(pScrn); 128 129 //flush caches, don't generate timestamp 130 BEGIN_BATCH(5); 131 PACK3(ib, IT_EVENT_WRITE, 1); 132 E32(ib, CACHE_FLUSH_AND_INV_EVENT); 133 // wait for 3D idle clean 134 EREG(ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | 135 WAIT_3D_IDLECLEAN_bit)); 136 END_BATCH(); 137} 138 139void 140wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) 141{ 142 RADEONInfoPtr info = RADEONPTR(pScrn); 143 144 BEGIN_BATCH(3); 145 EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); 146 END_BATCH(); 147} 148 149void 150start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) 151{ 152 RADEONInfoPtr info = RADEONPTR(pScrn); 153 154 if (info->ChipFamily < CHIP_FAMILY_RV770) { 155 BEGIN_BATCH(5); 156 PACK3(ib, IT_START_3D_CMDBUF, 1); 157 E32(ib, 0); 158 } else 159 BEGIN_BATCH(3); 160 161 PACK3(ib, IT_CONTEXT_CONTROL, 2); 162 E32(ib, 0x80000000); 163 E32(ib, 0x80000000); 164 END_BATCH(); 165 166} 167 168/* 169 * Setup of functional groups 170 */ 171 172// asic stack/thread/gpr limits - need to query the drm 173static void 174sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) 175{ 176 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 177 uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 178 RADEONInfoPtr info = RADEONPTR(pScrn); 179 180 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 181 (info->ChipFamily == CHIP_FAMILY_RV620) || 182 (info->ChipFamily == CHIP_FAMILY_RS780) || 183 (info->ChipFamily == CHIP_FAMILY_RS880) || 184 (info->ChipFamily == CHIP_FAMILY_RV710)) 185 sq_config = 0; // no VC 186 else 187 sq_config = VC_ENABLE_bit; 188 189 sq_config |= (DX9_CONSTS_bit | 190 ALU_INST_PREFER_VECTOR_bit | 191 (sq_conf->ps_prio << PS_PRIO_shift) | 192 (sq_conf->vs_prio << VS_PRIO_shift) | 193 (sq_conf->gs_prio << GS_PRIO_shift) | 194 (sq_conf->es_prio << ES_PRIO_shift)); 195 196 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 197 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 198 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 199 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 200 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 201 202 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 203 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 204 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 205 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 206 207 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 208 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 209 210 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 211 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 212 213 BEGIN_BATCH(8); 214 PACK0(ib, SQ_CONFIG, 6); 215 E32(ib, sq_config); 216 E32(ib, sq_gpr_resource_mgmt_1); 217 E32(ib, sq_gpr_resource_mgmt_2); 218 E32(ib, sq_thread_resource_mgmt); 219 E32(ib, sq_stack_resource_mgmt_1); 220 E32(ib, sq_stack_resource_mgmt_2); 221 END_BATCH(); 222} 223 224void 225set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain) 226{ 227 uint32_t cb_color_info; 228 int pitch, slice, h; 229 RADEONInfoPtr info = RADEONPTR(pScrn); 230 231 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 232 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 233 (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 234 (cb_conf->number_type << NUMBER_TYPE_shift) | 235 (cb_conf->comp_swap << COMP_SWAP_shift) | 236 (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 237 if (cb_conf->read_size) 238 cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 239 if (cb_conf->blend_clamp) 240 cb_color_info |= BLEND_CLAMP_bit; 241 if (cb_conf->clear_color) 242 cb_color_info |= CLEAR_COLOR_bit; 243 if (cb_conf->blend_bypass) 244 cb_color_info |= BLEND_BYPASS_bit; 245 if (cb_conf->blend_float32) 246 cb_color_info |= BLEND_FLOAT32_bit; 247 if (cb_conf->simple_float) 248 cb_color_info |= SIMPLE_FLOAT_bit; 249 if (cb_conf->round_mode) 250 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 251 if (cb_conf->tile_compact) 252 cb_color_info |= TILE_COMPACT_bit; 253 if (cb_conf->source_format) 254 cb_color_info |= SOURCE_FORMAT_bit; 255 256 pitch = (cb_conf->w / 8) - 1; 257 h = RADEON_ALIGN(cb_conf->h, 8); 258 slice = ((cb_conf->w * h) / 64) - 1; 259 260 BEGIN_BATCH(3 + 2); 261 EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 262 RELOC_BATCH(cb_conf->bo, 0, domain); 263 END_BATCH(); 264 265 // rv6xx workaround 266 if ((info->ChipFamily > CHIP_FAMILY_R600) && 267 (info->ChipFamily < CHIP_FAMILY_RV770)) { 268 BEGIN_BATCH(2); 269 PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); 270 E32(ib, (2 << cb_conf->id)); 271 END_BATCH(); 272 } 273 /* Set CMASK & TILE buffer to the offset of color buffer as 274 * we don't use those this shouldn't cause any issue and we 275 * then have a valid cmd stream 276 */ 277 BEGIN_BATCH(3 + 2); 278 EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 279 RELOC_BATCH(cb_conf->bo, 0, domain); 280 END_BATCH(); 281 BEGIN_BATCH(3 + 2); 282 EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 283 RELOC_BATCH(cb_conf->bo, 0, domain); 284 END_BATCH(); 285 BEGIN_BATCH(12); 286 // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 287 EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 288 (slice << SLICE_TILE_MAX_shift))); 289 EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 290 (0 << SLICE_MAX_shift))); 291 EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 292 EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 293 (0 << FMASK_TILE_MAX_shift))); 294 END_BATCH(); 295} 296 297void 298cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, 299 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 300{ 301 RADEONInfoPtr info = RADEONPTR(pScrn); 302 uint32_t cp_coher_size; 303 if (size == 0xffffffff) 304 cp_coher_size = 0xffffffff; 305 else 306 cp_coher_size = ((size + 255) >> 8); 307 308 BEGIN_BATCH(5 + 2); 309 PACK3(ib, IT_SURFACE_SYNC, 4); 310 E32(ib, sync_type); 311 E32(ib, cp_coher_size); 312 E32(ib, (mc_addr >> 8)); 313 E32(ib, 10); /* poll interval */ 314 RELOC_BATCH(bo, rdomains, wdomain); 315 END_BATCH(); 316} 317 318/* inserts a wait for vline in the command stream */ 319void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, 320 xf86CrtcPtr crtc, int start, int stop) 321{ 322 RADEONInfoPtr info = RADEONPTR(pScrn); 323 uint32_t offset; 324 325 if (!crtc) 326 return; 327 328 if (stop < start) 329 return; 330 331 if (!crtc->enabled) 332 return; 333 334 if (info->cs) { 335 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 336 return; 337 } else { 338#ifdef USE_EXA 339 if (info->useEXA) 340 offset = exaGetPixmapOffset(pPix); 341 else 342#endif 343 offset = pPix->devPrivate.ptr - info->FB; 344 345 /* if drawing to front buffer */ 346 if (offset != 0) 347 return; 348 } 349 350 start = max(start, 0); 351 stop = min(stop, crtc->mode.VDisplay); 352 353 if (start > crtc->mode.VDisplay) 354 return; 355 356#if defined(XF86DRM_MODE) 357 if (info->cs) { 358 drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; 359 360 BEGIN_BATCH(11); 361 /* set the VLINE range */ 362 EREG(ib, AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 363 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 364 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 365 366 /* tell the CP to poll the VLINE state register */ 367 PACK3(ib, IT_WAIT_REG_MEM, 6); 368 E32(ib, IT_WAIT_REG | IT_WAIT_EQ); 369 E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 370 E32(ib, 0); 371 E32(ib, 0); // Ref value 372 E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask 373 E32(ib, 10); // Wait interval 374 /* add crtc reloc */ 375 PACK3(ib, IT_NOP, 1); 376 E32(ib, drmmode_crtc->mode_crtc->crtc_id); 377 END_BATCH(); 378 } else 379#endif 380 { 381 RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; 382 383 BEGIN_BATCH(9); 384 /* set the VLINE range */ 385 EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, 386 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 387 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 388 389 /* tell the CP to poll the VLINE state register */ 390 PACK3(ib, IT_WAIT_REG_MEM, 6); 391 E32(ib, IT_WAIT_REG | IT_WAIT_EQ); 392 E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset)); 393 E32(ib, 0); 394 E32(ib, 0); // Ref value 395 E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask 396 E32(ib, 10); // Wait interval 397 END_BATCH(); 398 } 399} 400 401void 402fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain) 403{ 404 RADEONInfoPtr info = RADEONPTR(pScrn); 405 uint32_t sq_pgm_resources; 406 407 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 408 (fs_conf->stack_size << STACK_SIZE_shift)); 409 410 if (fs_conf->dx10_clamp) 411 sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 412 413 BEGIN_BATCH(3 + 2); 414 EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 415 RELOC_BATCH(fs_conf->bo, domain, 0); 416 END_BATCH(); 417 418 BEGIN_BATCH(6); 419 EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); 420 EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); 421 END_BATCH(); 422} 423 424void 425vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain) 426{ 427 RADEONInfoPtr info = RADEONPTR(pScrn); 428 uint32_t sq_pgm_resources; 429 430 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 431 (vs_conf->stack_size << STACK_SIZE_shift)); 432 433 if (vs_conf->dx10_clamp) 434 sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 435 if (vs_conf->fetch_cache_lines) 436 sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 437 if (vs_conf->uncached_first_inst) 438 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 439 440 BEGIN_BATCH(3 + 2); 441 EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 442 RELOC_BATCH(vs_conf->bo, domain, 0); 443 END_BATCH(); 444 445 BEGIN_BATCH(6); 446 EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); 447 EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); 448 END_BATCH(); 449} 450 451void 452ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain) 453{ 454 RADEONInfoPtr info = RADEONPTR(pScrn); 455 uint32_t sq_pgm_resources; 456 457 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 458 (ps_conf->stack_size << STACK_SIZE_shift)); 459 460 if (ps_conf->dx10_clamp) 461 sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 462 if (ps_conf->fetch_cache_lines) 463 sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 464 if (ps_conf->uncached_first_inst) 465 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 466 if (ps_conf->clamp_consts) 467 sq_pgm_resources |= CLAMP_CONSTS_bit; 468 469 BEGIN_BATCH(3 + 2); 470 EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 471 RELOC_BATCH(ps_conf->bo, domain, 0); 472 END_BATCH(); 473 474 BEGIN_BATCH(9); 475 EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); 476 EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 477 EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); 478 END_BATCH(); 479} 480 481void 482set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) 483{ 484 RADEONInfoPtr info = RADEONPTR(pScrn); 485 int i; 486 const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 487 488 BEGIN_BATCH(2 + countreg); 489 PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 490 for (i = 0; i < countreg; i++) 491 EFLOAT(ib, const_buf[i]); 492 END_BATCH(); 493} 494 495void 496set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) 497{ 498 RADEONInfoPtr info = RADEONPTR(pScrn); 499 /* bool register order is: ps, vs, gs; one register each 500 * 1 bits per bool; 32 bools each for ps, vs, gs. 501 */ 502 BEGIN_BATCH(3); 503 EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 504 END_BATCH(); 505} 506 507void 508set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain) 509{ 510 RADEONInfoPtr info = RADEONPTR(pScrn); 511 uint32_t sq_vtx_constant_word2; 512 513 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 514 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 515 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 516 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 517 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 518 if (res->clamp_x) 519 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 520 521 if (res->format_comp_all) 522 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 523 524 if (res->srf_mode_all) 525 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 526 527 BEGIN_BATCH(9 + 2); 528 PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 529 E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 530 E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE 531 E32(ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 532 E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 533 E32(ib, 0); // 4: n/a 534 E32(ib, 0); // 5: n/a 535 E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 536 RELOC_BATCH(res->bo, domain, 0); 537 END_BATCH(); 538} 539 540void 541set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain) 542{ 543 RADEONInfoPtr info = RADEONPTR(pScrn); 544 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 545 uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 546 547 sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 548 (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 549 550 if (tex_res->w) 551 sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | 552 ((tex_res->w - 1) << TEX_WIDTH_shift)); 553 554 if (tex_res->tile_type) 555 sq_tex_resource_word0 |= TILE_TYPE_bit; 556 557 sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 558 559 if (tex_res->h) 560 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 561 if (tex_res->depth) 562 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 563 564 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 565 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 566 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 567 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 568 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 569 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 570 (tex_res->request_size << REQUEST_SIZE_shift) | 571 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 572 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 573 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 574 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 575 (tex_res->base_level << BASE_LEVEL_shift)); 576 577 if (tex_res->srf_mode_all) 578 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 579 if (tex_res->force_degamma) 580 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 581 582 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 583 (tex_res->base_array << BASE_ARRAY_shift) | 584 (tex_res->last_array << LAST_ARRAY_shift)); 585 586 sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 587 (tex_res->perf_modulation << PERF_MODULATION_shift) | 588 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 589 590 if (tex_res->interlaced) 591 sq_tex_resource_word6 |= INTERLACED_bit; 592 593 BEGIN_BATCH(9 + 4); 594 PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 595 E32(ib, sq_tex_resource_word0); 596 E32(ib, sq_tex_resource_word1); 597 E32(ib, ((tex_res->base) >> 8)); 598 E32(ib, ((tex_res->mip_base) >> 8)); 599 E32(ib, sq_tex_resource_word4); 600 E32(ib, sq_tex_resource_word5); 601 E32(ib, sq_tex_resource_word6); 602 RELOC_BATCH(tex_res->bo, domain, 0); 603 RELOC_BATCH(tex_res->mip_bo, domain, 0); 604 END_BATCH(); 605} 606 607void 608set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) 609{ 610 RADEONInfoPtr info = RADEONPTR(pScrn); 611 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 612 613 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 614 (s->clamp_y << CLAMP_Y_shift) | 615 (s->clamp_z << CLAMP_Z_shift) | 616 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 617 (s->xy_min_filter << XY_MIN_FILTER_shift) | 618 (s->z_filter << Z_FILTER_shift) | 619 (s->mip_filter << MIP_FILTER_shift) | 620 (s->border_color << BORDER_COLOR_TYPE_shift) | 621 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 622 (s->chroma_key << CHROMA_KEY_shift)); 623 if (s->point_sampling_clamp) 624 sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 625 if (s->tex_array_override) 626 sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 627 if (s->lod_uses_minor_axis) 628 sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 629 630 sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 631 (s->max_lod << MAX_LOD_shift) | 632 (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 633 634 sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 635 (s->perf_mip << PERF_MIP_shift) | 636 (s->perf_z << PERF_Z_shift)); 637 if (s->mc_coord_truncate) 638 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 639 if (s->force_degamma) 640 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 641 if (s->high_precision_filter) 642 sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 643 if (s->fetch_4) 644 sq_tex_sampler_word2 |= FETCH_4_bit; 645 if (s->sample_is_pcf) 646 sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 647 if (s->type) 648 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 649 650 BEGIN_BATCH(5); 651 PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 652 E32(ib, sq_tex_sampler_word0); 653 E32(ib, sq_tex_sampler_word1); 654 E32(ib, sq_tex_sampler_word2); 655 END_BATCH(); 656} 657 658//XXX deal with clip offsets in clip setup 659void 660set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 661{ 662 RADEONInfoPtr info = RADEONPTR(pScrn); 663 664 BEGIN_BATCH(6); 665 EREG(ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 666 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 667 EREG(ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 668 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 669 END_BATCH(); 670} 671 672void 673set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 674{ 675 RADEONInfoPtr info = RADEONPTR(pScrn); 676 677 BEGIN_BATCH(6); 678 EREG(ib, PA_SC_VPORT_SCISSOR_0_TL + 679 id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 680 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 681 WINDOW_OFFSET_DISABLE_bit)); 682 EREG(ib, PA_SC_VPORT_SCISSOR_0_BR + 683 id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 684 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 685 END_BATCH(); 686} 687 688void 689set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 690{ 691 RADEONInfoPtr info = RADEONPTR(pScrn); 692 693 BEGIN_BATCH(6); 694 EREG(ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 695 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 696 WINDOW_OFFSET_DISABLE_bit)); 697 EREG(ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 698 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 699 END_BATCH(); 700} 701 702void 703set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 704{ 705 RADEONInfoPtr info = RADEONPTR(pScrn); 706 707 BEGIN_BATCH(6); 708 EREG(ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 709 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 710 WINDOW_OFFSET_DISABLE_bit)); 711 EREG(ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 712 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 713 END_BATCH(); 714} 715 716void 717set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 718{ 719 RADEONInfoPtr info = RADEONPTR(pScrn); 720 721 BEGIN_BATCH(6); 722 EREG(ib, PA_SC_CLIPRECT_0_TL + 723 id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 724 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 725 EREG(ib, PA_SC_CLIPRECT_0_BR + 726 id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 727 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 728 END_BATCH(); 729} 730 731/* 732 * Setup of default state 733 */ 734 735void 736set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) 737{ 738 tex_resource_t tex_res; 739 shader_config_t fs_conf; 740 sq_config_t sq_conf; 741 int i; 742 RADEONInfoPtr info = RADEONPTR(pScrn); 743 struct radeon_accel_state *accel_state = info->accel_state; 744 745 if (accel_state->XInited3D) 746 return; 747 748 memset(&tex_res, 0, sizeof(tex_resource_t)); 749 memset(&fs_conf, 0, sizeof(shader_config_t)); 750 751 accel_state->XInited3D = TRUE; 752 753 start_3d(pScrn, accel_state->ib); 754 755 // ASIC specific setup, see drm 756 BEGIN_BATCH(15); 757 if (info->ChipFamily < CHIP_FAMILY_RV770) { 758 EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 759 (28 << TD_FIFO_CREDIT_shift))); 760 EREG(ib, VC_ENHANCE, 0); 761 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 762 EREG(ib, DB_DEBUG, 0x82000000); /* ? */ 763 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 764 (16 << DEPTH_FLUSH_shift) | 765 (0 << FORCE_SUMMARIZE_shift) | 766 (4 << DEPTH_PENDING_FREE_shift) | 767 (16 << DEPTH_CACHELINE_FREE_shift) | 768 0)); 769 } else { 770 EREG(ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 771 (28 << TD_FIFO_CREDIT_shift))); 772 EREG(ib, VC_ENHANCE, 0); 773 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 774 EREG(ib, DB_DEBUG, 0); 775 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 776 (16 << DEPTH_FLUSH_shift) | 777 (0 << FORCE_SUMMARIZE_shift) | 778 (4 << DEPTH_PENDING_FREE_shift) | 779 (4 << DEPTH_CACHELINE_FREE_shift) | 780 0)); 781 } 782 END_BATCH(); 783 784 // SQ 785 sq_conf.ps_prio = 0; 786 sq_conf.vs_prio = 1; 787 sq_conf.gs_prio = 2; 788 sq_conf.es_prio = 3; 789 // need to set stack/thread/gpr limits based on the asic 790 // for now just set them low enough so any card will work 791 // see r600_cp.c in the drm 792 switch (info->ChipFamily) { 793 case CHIP_FAMILY_R600: 794 sq_conf.num_ps_gprs = 192; 795 sq_conf.num_vs_gprs = 56; 796 sq_conf.num_temp_gprs = 4; 797 sq_conf.num_gs_gprs = 0; 798 sq_conf.num_es_gprs = 0; 799 sq_conf.num_ps_threads = 136; 800 sq_conf.num_vs_threads = 48; 801 sq_conf.num_gs_threads = 4; 802 sq_conf.num_es_threads = 4; 803 sq_conf.num_ps_stack_entries = 128; 804 sq_conf.num_vs_stack_entries = 128; 805 sq_conf.num_gs_stack_entries = 0; 806 sq_conf.num_es_stack_entries = 0; 807 break; 808 case CHIP_FAMILY_RV630: 809 case CHIP_FAMILY_RV635: 810 sq_conf.num_ps_gprs = 84; 811 sq_conf.num_vs_gprs = 36; 812 sq_conf.num_temp_gprs = 4; 813 sq_conf.num_gs_gprs = 0; 814 sq_conf.num_es_gprs = 0; 815 sq_conf.num_ps_threads = 144; 816 sq_conf.num_vs_threads = 40; 817 sq_conf.num_gs_threads = 4; 818 sq_conf.num_es_threads = 4; 819 sq_conf.num_ps_stack_entries = 40; 820 sq_conf.num_vs_stack_entries = 40; 821 sq_conf.num_gs_stack_entries = 32; 822 sq_conf.num_es_stack_entries = 16; 823 break; 824 case CHIP_FAMILY_RV610: 825 case CHIP_FAMILY_RV620: 826 case CHIP_FAMILY_RS780: 827 case CHIP_FAMILY_RS880: 828 default: 829 sq_conf.num_ps_gprs = 84; 830 sq_conf.num_vs_gprs = 36; 831 sq_conf.num_temp_gprs = 4; 832 sq_conf.num_gs_gprs = 0; 833 sq_conf.num_es_gprs = 0; 834 sq_conf.num_ps_threads = 136; 835 sq_conf.num_vs_threads = 48; 836 sq_conf.num_gs_threads = 4; 837 sq_conf.num_es_threads = 4; 838 sq_conf.num_ps_stack_entries = 40; 839 sq_conf.num_vs_stack_entries = 40; 840 sq_conf.num_gs_stack_entries = 32; 841 sq_conf.num_es_stack_entries = 16; 842 break; 843 case CHIP_FAMILY_RV670: 844 sq_conf.num_ps_gprs = 144; 845 sq_conf.num_vs_gprs = 40; 846 sq_conf.num_temp_gprs = 4; 847 sq_conf.num_gs_gprs = 0; 848 sq_conf.num_es_gprs = 0; 849 sq_conf.num_ps_threads = 136; 850 sq_conf.num_vs_threads = 48; 851 sq_conf.num_gs_threads = 4; 852 sq_conf.num_es_threads = 4; 853 sq_conf.num_ps_stack_entries = 40; 854 sq_conf.num_vs_stack_entries = 40; 855 sq_conf.num_gs_stack_entries = 32; 856 sq_conf.num_es_stack_entries = 16; 857 break; 858 case CHIP_FAMILY_RV770: 859 sq_conf.num_ps_gprs = 192; 860 sq_conf.num_vs_gprs = 56; 861 sq_conf.num_temp_gprs = 4; 862 sq_conf.num_gs_gprs = 0; 863 sq_conf.num_es_gprs = 0; 864 sq_conf.num_ps_threads = 188; 865 sq_conf.num_vs_threads = 60; 866 sq_conf.num_gs_threads = 0; 867 sq_conf.num_es_threads = 0; 868 sq_conf.num_ps_stack_entries = 256; 869 sq_conf.num_vs_stack_entries = 256; 870 sq_conf.num_gs_stack_entries = 0; 871 sq_conf.num_es_stack_entries = 0; 872 break; 873 case CHIP_FAMILY_RV730: 874 case CHIP_FAMILY_RV740: 875 sq_conf.num_ps_gprs = 84; 876 sq_conf.num_vs_gprs = 36; 877 sq_conf.num_temp_gprs = 4; 878 sq_conf.num_gs_gprs = 0; 879 sq_conf.num_es_gprs = 0; 880 sq_conf.num_ps_threads = 188; 881 sq_conf.num_vs_threads = 60; 882 sq_conf.num_gs_threads = 0; 883 sq_conf.num_es_threads = 0; 884 sq_conf.num_ps_stack_entries = 128; 885 sq_conf.num_vs_stack_entries = 128; 886 sq_conf.num_gs_stack_entries = 0; 887 sq_conf.num_es_stack_entries = 0; 888 break; 889 case CHIP_FAMILY_RV710: 890 sq_conf.num_ps_gprs = 192; 891 sq_conf.num_vs_gprs = 56; 892 sq_conf.num_temp_gprs = 4; 893 sq_conf.num_gs_gprs = 0; 894 sq_conf.num_es_gprs = 0; 895 sq_conf.num_ps_threads = 144; 896 sq_conf.num_vs_threads = 48; 897 sq_conf.num_gs_threads = 0; 898 sq_conf.num_es_threads = 0; 899 sq_conf.num_ps_stack_entries = 128; 900 sq_conf.num_vs_stack_entries = 128; 901 sq_conf.num_gs_stack_entries = 0; 902 sq_conf.num_es_stack_entries = 0; 903 break; 904 } 905 906 sq_setup(pScrn, ib, &sq_conf); 907 908 BEGIN_BATCH(59); 909 EREG(ib, SQ_VTX_BASE_VTX_LOC, 0); 910 EREG(ib, SQ_VTX_START_INST_LOC, 0); 911 912 PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9); 913 E32(ib, 0); // SQ_ESGS_RING_ITEMSIZE 914 E32(ib, 0); // SQ_GSVS_RING_ITEMSIZE 915 E32(ib, 0); // SQ_ESTMP_RING_ITEMSIZE 916 E32(ib, 0); // SQ_GSTMP_RING_ITEMSIZE 917 E32(ib, 0); // SQ_VSTMP_RING_ITEMSIZE 918 E32(ib, 0); // SQ_PSTMP_RING_ITEMSIZE 919 E32(ib, 0); // SQ_FBUF_RING_ITEMSIZE 920 E32(ib, 0); // SQ_REDUC_RING_ITEMSIZE 921 E32(ib, 0); // SQ_GS_VERT_ITEMSIZE 922 923 // DB 924 EREG(ib, DB_DEPTH_INFO, 0); 925 EREG(ib, DB_STENCIL_CLEAR, 0); 926 EREG(ib, DB_DEPTH_CLEAR, 0); 927 EREG(ib, DB_STENCILREFMASK, 0); 928 EREG(ib, DB_STENCILREFMASK_BF, 0); 929 EREG(ib, DB_DEPTH_CONTROL, 0); 930 EREG(ib, DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 931 if (info->ChipFamily < CHIP_FAMILY_RV770) 932 EREG(ib, DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit); 933 else 934 EREG(ib, DB_RENDER_OVERRIDE, 0); 935 EREG(ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 936 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 937 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 938 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 939 940 941 EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 942 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 943 944 945 // SX 946 EREG(ib, SX_ALPHA_TEST_CONTROL, 0); 947 EREG(ib, SX_ALPHA_REF, 0); 948 949 // CB 950 PACK0(ib, CB_BLEND_RED, 4); 951 E32(ib, 0x00000000); 952 E32(ib, 0x00000000); 953 E32(ib, 0x00000000); 954 E32(ib, 0x00000000); 955 END_BATCH(); 956 957 if (info->ChipFamily < CHIP_FAMILY_RV770) { 958 BEGIN_BATCH(11); 959 PACK0(ib, CB_FOG_RED, 3); 960 E32(ib, 0x00000000); 961 E32(ib, 0x00000000); 962 E32(ib, 0x00000000); 963 PACK0(ib, CB_CLEAR_RED, 4); 964 EFLOAT(ib, 1.0); /* WTF? */ 965 EFLOAT(ib, 0.0); 966 EFLOAT(ib, 1.0); 967 EFLOAT(ib, 1.0); 968 END_BATCH(); 969 } 970 971 BEGIN_BATCH(18); 972 PACK0(ib, CB_CLRCMP_CONTROL, 4); 973 E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 974 E32(ib, 0); // CB_CLRCMP_SRC 975 E32(ib, 0); // CB_CLRCMP_DST 976 E32(ib, 0); // CB_CLRCMP_MSK 977 978 EREG(ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); 979 EREG(ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 980 981 982 // SC 983 EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 984 (0 << WINDOW_Y_OFFSET_shift))); 985 986 EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 987 END_BATCH(); 988 989 /* clip boolean is set to always visible -> doesn't matter */ 990 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 991 set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); 992 993 BEGIN_BATCH(3); 994 if (info->ChipFamily < CHIP_FAMILY_RV770) 995 EREG(ib, R7xx_PA_SC_EDGERULE, 0x00000000); 996 else 997 EREG(ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 998 END_BATCH(); 999 1000 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) { 1001 set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); 1002 BEGIN_BATCH(4); 1003 PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2); 1004 EFLOAT(ib, 0.0); 1005 EFLOAT(ib, 1.0); 1006 END_BATCH(); 1007 } 1008 1009 BEGIN_BATCH(15); 1010 if (info->ChipFamily < CHIP_FAMILY_RV770) 1011 EREG(ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1012 else 1013 EREG(ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1014 0x00500000)); /* ? */ 1015 1016 EREG(ib, PA_SU_SC_MODE_CNTL, (FACE_bit | 1017 (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | 1018 (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); 1019 1020 1021 EREG(ib, PA_SC_LINE_CNTL, 0); 1022 EREG(ib, PA_SC_AA_CONFIG, 0); 1023 EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); 1024 END_BATCH(); 1025 1026 //XXX: double check this 1027 if (info->ChipFamily > CHIP_FAMILY_R600) { 1028 BEGIN_BATCH(6); 1029 EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0); 1030 EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0); 1031 END_BATCH(); 1032 } 1033 1034 BEGIN_BATCH(83); 1035 EREG(ib, PA_SC_LINE_STIPPLE, 0); 1036 EREG(ib, PA_SC_MPASS_PS_CNTL, 0); 1037 1038 // CL 1039 PACK0(ib, PA_CL_VPORT_XSCALE_0, 6); 1040 EFLOAT(ib, 0.0f); // PA_CL_VPORT_XSCALE 1041 EFLOAT(ib, 0.0f); // PA_CL_VPORT_XOFFSET 1042 EFLOAT(ib, 0.0f); // PA_CL_VPORT_YSCALE 1043 EFLOAT(ib, 0.0f); // PA_CL_VPORT_YOFFSET 1044 EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZSCALE 1045 EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZOFFSET 1046 EREG(ib, PA_CL_VTE_CNTL, 0); 1047 EREG(ib, PA_CL_VS_OUT_CNTL, 0); 1048 EREG(ib, PA_CL_NANINF_CNTL, 0); 1049 PACK0(ib, PA_CL_GB_VERT_CLIP_ADJ, 4); 1050 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ 1051 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ 1052 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1053 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ 1054 1055 /* Scissor / viewport */ 1056 EREG(ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); 1057 EREG(ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); 1058 1059 // SU 1060 EREG(ib, PA_SU_SC_MODE_CNTL, FACE_bit); 1061 EREG(ib, PA_SU_POINT_SIZE, 0); 1062 EREG(ib, PA_SU_POINT_MINMAX, 0); 1063 EREG(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0); 1064 EREG(ib, PA_SU_POLY_OFFSET_BACK_SCALE, 0); 1065 EREG(ib, PA_SU_POLY_OFFSET_FRONT_SCALE, 0); 1066 EREG(ib, PA_SU_POLY_OFFSET_BACK_OFFSET, 0); 1067 EREG(ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0); 1068 1069 EREG(ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ 1070 EREG(ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | 1071 (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 1072 EREG(ib, PA_SU_POLY_OFFSET_CLAMP, 0); 1073 1074 // SPI 1075 if (info->ChipFamily < CHIP_FAMILY_RV770) 1076 EREG(ib, R7xx_SPI_THREAD_GROUPING, 0); 1077 else 1078 EREG(ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1079 1080 EREG(ib, SPI_INPUT_Z, 0); 1081 EREG(ib, SPI_FOG_CNTL, 0); 1082 EREG(ib, SPI_FOG_FUNC_SCALE, 0); 1083 EREG(ib, SPI_FOG_FUNC_BIAS, 0); 1084 END_BATCH(); 1085 1086 // clear FS 1087 fs_conf.bo = accel_state->shaders_bo; 1088 fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1089 1090 // VGT 1091 BEGIN_BATCH(75); 1092 EREG(ib, VGT_MAX_VTX_INDX, 2048); /* XXX set to a reasonably large number of indices */ 1093 EREG(ib, VGT_MIN_VTX_INDX, 0); 1094 EREG(ib, VGT_INDX_OFFSET, 0); 1095 EREG(ib, VGT_INSTANCE_STEP_RATE_0, 0); 1096 EREG(ib, VGT_INSTANCE_STEP_RATE_1, 0); 1097 EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0); 1098 EREG(ib, VGT_OUTPUT_PATH_CNTL, 0); 1099 EREG(ib, VGT_GS_MODE, 0); 1100 EREG(ib, VGT_HOS_CNTL, 0); 1101 EREG(ib, VGT_HOS_MAX_TESS_LEVEL, 0); 1102 EREG(ib, VGT_HOS_MIN_TESS_LEVEL, 0); 1103 EREG(ib, VGT_HOS_REUSE_DEPTH, 0); 1104 EREG(ib, VGT_GROUP_PRIM_TYPE, 0); 1105 EREG(ib, VGT_GROUP_FIRST_DECR, 0); 1106 EREG(ib, VGT_GROUP_DECR, 0); 1107 EREG(ib, VGT_GROUP_VECT_0_CNTL, 0); 1108 EREG(ib, VGT_GROUP_VECT_1_CNTL, 0); 1109 EREG(ib, VGT_GROUP_VECT_0_FMT_CNTL, 0); 1110 EREG(ib, VGT_GROUP_VECT_1_FMT_CNTL, 0); 1111 EREG(ib, VGT_PRIMITIVEID_EN, 0); 1112 EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN, 0); 1113 EREG(ib, VGT_STRMOUT_EN, 0); 1114 EREG(ib, VGT_REUSE_OFF, 0); 1115 EREG(ib, VGT_VTX_CNT_EN, 0); 1116 EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); 1117 END_BATCH(); 1118} 1119 1120 1121/* 1122 * Commands 1123 */ 1124 1125void 1126draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) 1127{ 1128 RADEONInfoPtr info = RADEONPTR(pScrn); 1129 uint32_t i, count; 1130 1131 // calculate num of packets 1132 count = 2; 1133 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1134 count += (draw_conf->num_indices + 1) / 2; 1135 else 1136 count += draw_conf->num_indices; 1137 1138 BEGIN_BATCH(8 + count); 1139 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1140 PACK3(ib, IT_INDEX_TYPE, 1); 1141 E32(ib, draw_conf->index_type); 1142 PACK3(ib, IT_NUM_INSTANCES, 1); 1143 E32(ib, draw_conf->num_instances); 1144 1145 PACK3(ib, IT_DRAW_INDEX_IMMD, count); 1146 E32(ib, draw_conf->num_indices); 1147 E32(ib, draw_conf->vgt_draw_initiator); 1148 1149 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1150 for (i = 0; i < draw_conf->num_indices; i += 2) { 1151 if ((i + 1) == draw_conf->num_indices) 1152 E32(ib, indices[i]); 1153 else 1154 E32(ib, (indices[i] | (indices[i + 1] << 16))); 1155 } 1156 } else { 1157 for (i = 0; i < draw_conf->num_indices; i++) 1158 E32(ib, indices[i]); 1159 } 1160 END_BATCH(); 1161} 1162 1163void 1164draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) 1165{ 1166 RADEONInfoPtr info = RADEONPTR(pScrn); 1167 1168 BEGIN_BATCH(10); 1169 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1170 PACK3(ib, IT_INDEX_TYPE, 1); 1171 E32(ib, draw_conf->index_type); 1172 PACK3(ib, IT_NUM_INSTANCES, 1); 1173 E32(ib, draw_conf->num_instances); 1174 PACK3(ib, IT_DRAW_INDEX_AUTO, 2); 1175 E32(ib, draw_conf->num_indices); 1176 E32(ib, draw_conf->vgt_draw_initiator); 1177 END_BATCH(); 1178} 1179 1180Bool 1181r600_vb_get(ScrnInfoPtr pScrn) 1182{ 1183 RADEONInfoPtr info = RADEONPTR(pScrn); 1184 struct radeon_accel_state *accel_state = info->accel_state; 1185 1186 accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + 1187 (accel_state->ib->idx*accel_state->ib->total)+ 1188 (accel_state->ib->total / 2); 1189 accel_state->vb_total = (accel_state->ib->total / 2); 1190 accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + 1191 (accel_state->ib->total / 2)); 1192 accel_state->vb_offset = 0; 1193 return TRUE; 1194} 1195 1196void 1197r600_vb_discard(ScrnInfoPtr pScrn) 1198{ 1199 RADEONInfoPtr info = RADEONPTR(pScrn); 1200 1201 info->accel_state->vb_start_op = -1; 1202} 1203 1204 1205 1206int 1207r600_cp_start(ScrnInfoPtr pScrn) 1208{ 1209 RADEONInfoPtr info = RADEONPTR(pScrn); 1210 struct radeon_accel_state *accel_state = info->accel_state; 1211 1212#if defined(XF86DRM_MODE) 1213 if (info->cs) { 1214 if (CS_FULL(info->cs)) { 1215 radeon_cs_flush_indirect(pScrn); 1216 } 1217 accel_state->ib_reset_op = info->cs->cdw; 1218 accel_state->vb_start_op = accel_state->vb_offset; 1219 } else 1220#endif 1221 { 1222 accel_state->ib = RADEONCPGetBuffer(pScrn); 1223 if (!r600_vb_get(pScrn)) { 1224 return -1; 1225 } 1226 accel_state->vb_start_op = accel_state->vb_offset; 1227 } 1228 return 0; 1229} 1230 1231void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1232{ 1233 RADEONInfoPtr info = RADEONPTR(pScrn); 1234 struct radeon_accel_state *accel_state = info->accel_state; 1235 draw_config_t draw_conf; 1236 vtx_resource_t vtx_res; 1237 1238 if (accel_state->vb_start_op == -1) 1239 return; 1240 1241 CLEAR (draw_conf); 1242 CLEAR (vtx_res); 1243 1244 if (accel_state->vb_offset == accel_state->vb_start_op) { 1245 R600IBDiscard(pScrn, accel_state->ib); 1246 r600_vb_discard(pScrn); 1247 return; 1248 } 1249 1250 /* flush vertex cache */ 1251 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 1252 (info->ChipFamily == CHIP_FAMILY_RV620) || 1253 (info->ChipFamily == CHIP_FAMILY_RS780) || 1254 (info->ChipFamily == CHIP_FAMILY_RS880) || 1255 (info->ChipFamily == CHIP_FAMILY_RV710)) 1256 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 1257 accel_state->vb_offset, accel_state->vb_mc_addr, 1258 accel_state->vb_bo, 1259 RADEON_GEM_DOMAIN_GTT, 0); 1260 else 1261 cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, 1262 accel_state->vb_offset, accel_state->vb_mc_addr, 1263 accel_state->vb_bo, 1264 RADEON_GEM_DOMAIN_GTT, 0); 1265 1266 /* Vertex buffer setup */ 1267 accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op; 1268 vtx_res.id = SQ_VTX_RESOURCE_vs; 1269 vtx_res.vtx_size_dw = vtx_size / 4; 1270 vtx_res.vtx_num_entries = accel_state->vb_size / 4; 1271 vtx_res.mem_req_size = 1; 1272 vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; 1273 vtx_res.bo = accel_state->vb_bo; 1274 set_vtx_resource (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1275 1276 /* Draw */ 1277 draw_conf.prim_type = DI_PT_RECTLIST; 1278 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1279 draw_conf.num_instances = 1; 1280 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1281 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1282 1283 draw_auto(pScrn, accel_state->ib, &draw_conf); 1284 1285 /* XXX drm should handle this in fence submit */ 1286 wait_3d_idle_clean(pScrn, accel_state->ib); 1287 1288 /* sync dst surface */ 1289 cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1290 accel_state->dst_size, accel_state->dst_obj.offset, 1291 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1292 1293 accel_state->vb_start_op = -1; 1294 accel_state->ib_reset_op = 0; 1295 1296#if KMS_MULTI_OP 1297 if (!info->cs) 1298#endif 1299 R600CPFlushIndirect(pScrn, accel_state->ib); 1300} 1301 1302void r600_vb_no_space(ScrnInfoPtr pScrn, int vert_size) 1303{ 1304#ifdef XF86DRM_MODE 1305 RADEONInfoPtr info = RADEONPTR(pScrn); 1306 struct radeon_accel_state *accel_state = info->accel_state; 1307 1308 if (info->cs) { 1309 if (accel_state->vb_bo) { 1310 if (accel_state->vb_start_op != accel_state->vb_offset) { 1311 r600_finish_op(pScrn, vert_size); 1312 accel_state->ib_reset_op = info->cs->cdw; 1313 } 1314 1315 /* release the current VBO */ 1316 radeon_vbo_put(pScrn); 1317 } 1318 1319 /* get a new one */ 1320 radeon_vbo_get(pScrn); 1321 return; 1322 } 1323#endif 1324 1325 r600_finish_op(pScrn, vert_size); 1326 r600_cp_start(pScrn); 1327} 1328