evergreen_accel.c revision c4ae5be6
1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#ifdef XF86DRM_MODE 31 32#include "xf86.h" 33 34#include <errno.h> 35 36#include "radeon.h" 37#include "evergreen_shader.h" 38#include "radeon_reg.h" 39#include "evergreen_reg.h" 40#include "evergreen_state.h" 41 42#include "radeon_drm.h" 43#include "radeon_vbo.h" 44#include "radeon_exa_shared.h" 45 46static const uint32_t EVERGREEN_ROP[16] = { 47 RADEON_ROP3_ZERO, /* GXclear */ 48 RADEON_ROP3_DSa, /* Gxand */ 49 RADEON_ROP3_SDna, /* GXandReverse */ 50 RADEON_ROP3_S, /* GXcopy */ 51 RADEON_ROP3_DSna, /* GXandInverted */ 52 RADEON_ROP3_D, /* GXnoop */ 53 RADEON_ROP3_DSx, /* GXxor */ 54 RADEON_ROP3_DSo, /* GXor */ 55 RADEON_ROP3_DSon, /* GXnor */ 56 RADEON_ROP3_DSxn, /* GXequiv */ 57 RADEON_ROP3_Dn, /* GXinvert */ 58 RADEON_ROP3_SDno, /* GXorReverse */ 59 RADEON_ROP3_Sn, /* GXcopyInverted */ 60 RADEON_ROP3_DSno, /* GXorInverted */ 61 RADEON_ROP3_DSan, /* GXnand */ 62 RADEON_ROP3_ONE, /* GXset */ 63}; 64 65void 66evergreen_start_3d(ScrnInfoPtr pScrn) 67{ 68 RADEONInfoPtr info = RADEONPTR(pScrn); 69 70 BEGIN_BATCH(3); 71 PACK3(IT_CONTEXT_CONTROL, 2); 72 E32(0x80000000); 73 E32(0x80000000); 74 END_BATCH(); 75 76} 77 78/* 79 * Setup of functional groups 80 */ 81 82// asic stack/thread/gpr limits - need to query the drm 83static void 84evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 85{ 86 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; 87 uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; 88 uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; 89 RADEONInfoPtr info = RADEONPTR(pScrn); 90 91 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) || 92 (info->ChipFamily == CHIP_FAMILY_PALM) || 93 (info->ChipFamily == CHIP_FAMILY_CAICOS)) 94 sq_config = 0; 95 else 96 sq_config = VC_ENABLE_bit; 97 98 sq_config |= (EXPORT_SRC_C_bit | 99 (sq_conf->cs_prio << CS_PRIO_shift) | 100 (sq_conf->ls_prio << LS_PRIO_shift) | 101 (sq_conf->hs_prio << HS_PRIO_shift) | 102 (sq_conf->ps_prio << PS_PRIO_shift) | 103 (sq_conf->vs_prio << VS_PRIO_shift) | 104 (sq_conf->gs_prio << GS_PRIO_shift) | 105 (sq_conf->es_prio << ES_PRIO_shift)); 106 107 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 108 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 109 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 110 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 111 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 112 sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) | 113 (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift)); 114 115 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 116 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 117 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 118 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 119 sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) | 120 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift)); 121 122 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 123 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 124 125 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 126 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 127 128 sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) | 129 (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift)); 130 131 BEGIN_BATCH(16); 132 /* disable dyn gprs */ 133 EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 134 PACK0(SQ_CONFIG, 4); 135 E32(sq_config); 136 E32(sq_gpr_resource_mgmt_1); 137 E32(sq_gpr_resource_mgmt_2); 138 E32(sq_gpr_resource_mgmt_3); 139 PACK0(SQ_THREAD_RESOURCE_MGMT, 5); 140 E32(sq_thread_resource_mgmt); 141 E32(sq_thread_resource_mgmt_2); 142 E32(sq_stack_resource_mgmt_1); 143 E32(sq_stack_resource_mgmt_2); 144 E32(sq_stack_resource_mgmt_3); 145 END_BATCH(); 146} 147 148/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that 149 * we use here. 150 */ 151void 152evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 153{ 154 uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim; 155 int pitch, slice, h; 156 RADEONInfoPtr info = RADEONPTR(pScrn); 157 158 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 159 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 160 (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 161 (cb_conf->number_type << NUMBER_TYPE_shift) | 162 (cb_conf->comp_swap << COMP_SWAP_shift) | 163 (cb_conf->source_format << SOURCE_FORMAT_shift) | 164 (cb_conf->resource_type << RESOURCE_TYPE_shift)); 165 if (cb_conf->blend_clamp) 166 cb_color_info |= BLEND_CLAMP_bit; 167 if (cb_conf->fast_clear) 168 cb_color_info |= FAST_CLEAR_bit; 169 if (cb_conf->compression) 170 cb_color_info |= COMPRESSION_bit; 171 if (cb_conf->blend_bypass) 172 cb_color_info |= BLEND_BYPASS_bit; 173 if (cb_conf->simple_float) 174 cb_color_info |= SIMPLE_FLOAT_bit; 175 if (cb_conf->round_mode) 176 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 177 if (cb_conf->tile_compact) 178 cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit; 179 if (cb_conf->rat) 180 cb_color_info |= RAT_bit; 181 182 /* bit 4 needs to be set for linear and depth/stencil surfaces */ 183 if (cb_conf->non_disp_tiling) 184 cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit; 185 186 pitch = (cb_conf->w / 8) - 1; 187 h = RADEON_ALIGN(cb_conf->h, 8); 188 slice = ((cb_conf->w * h) / 64) - 1; 189 190 switch (cb_conf->resource_type) { 191 case BUFFER: 192 /* number of elements in the surface */ 193 cb_color_dim = pitch * slice; 194 break; 195 default: 196 /* w/h of the surface */ 197 cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) | 198 ((cb_conf->h - 1) << HEIGHT_MAX_shift)); 199 break; 200 } 201 202 BEGIN_BATCH(3 + 2); 203 EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8)); 204 RELOC_BATCH(cb_conf->bo, 0, domain); 205 END_BATCH(); 206 207 /* Set CMASK & FMASK buffer to the offset of color buffer as 208 * we don't use those this shouldn't cause any issue and we 209 * then have a valid cmd stream 210 */ 211 BEGIN_BATCH(3 + 2); 212 EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8)); 213 RELOC_BATCH(cb_conf->bo, 0, domain); 214 END_BATCH(); 215 BEGIN_BATCH(3 + 2); 216 EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8)); 217 RELOC_BATCH(cb_conf->bo, 0, domain); 218 END_BATCH(); 219 220 /* tiling config */ 221 BEGIN_BATCH(3 + 2); 222 EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib); 223 RELOC_BATCH(cb_conf->bo, 0, domain); 224 END_BATCH(); 225 BEGIN_BATCH(3 + 2); 226 EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info); 227 RELOC_BATCH(cb_conf->bo, 0, domain); 228 END_BATCH(); 229 230 BEGIN_BATCH(33); 231 EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch); 232 EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice); 233 EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0); 234 EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim); 235 EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0); 236 EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0); 237 PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4); 238 E32(0); 239 E32(0); 240 E32(0); 241 E32(0); 242 EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 243 EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[cb_conf->rop] | 244 (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); 245 EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 246 END_BATCH(); 247 248} 249 250static void 251evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 252 uint32_t size, uint64_t mc_addr, 253 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 254{ 255 RADEONInfoPtr info = RADEONPTR(pScrn); 256 uint32_t cp_coher_size; 257 if (size == 0xffffffff) 258 cp_coher_size = 0xffffffff; 259 else 260 cp_coher_size = ((size + 255) >> 8); 261 262 BEGIN_BATCH(5 + 2); 263 PACK3(IT_SURFACE_SYNC, 4); 264 E32(sync_type); 265 E32(cp_coher_size); 266 E32((mc_addr >> 8)); 267 E32(10); /* poll interval */ 268 RELOC_BATCH(bo, rdomains, wdomain); 269 END_BATCH(); 270} 271 272/* inserts a wait for vline in the command stream */ 273void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 274 xf86CrtcPtr crtc, int start, int stop) 275{ 276 RADEONInfoPtr info = RADEONPTR(pScrn); 277 drmmode_crtc_private_ptr drmmode_crtc; 278 uint32_t offset; 279 280 if (!crtc) 281 return; 282 283 drmmode_crtc = crtc->driver_private; 284 285 if (stop < start) 286 return; 287 288 if (!crtc->enabled) 289 return; 290 291 if (info->cs) { 292 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 293 return; 294 } else { 295#ifdef USE_EXA 296 if (info->useEXA) 297 offset = exaGetPixmapOffset(pPix); 298 else 299#endif 300 offset = pPix->devPrivate.ptr - info->FB; 301 302 /* if drawing to front buffer */ 303 if (offset != 0) 304 return; 305 } 306 307 start = max(start, 0); 308 stop = min(stop, crtc->mode.VDisplay); 309 310 if (start > crtc->mode.VDisplay) 311 return; 312 313 BEGIN_BATCH(11); 314 /* set the VLINE range */ 315 EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */ 316 (start << EVERGREEN_VLINE_START_SHIFT) | 317 (stop << EVERGREEN_VLINE_END_SHIFT)); 318 319 /* tell the CP to poll the VLINE state register */ 320 PACK3(IT_WAIT_REG_MEM, 6); 321 E32(IT_WAIT_REG | IT_WAIT_EQ); 322 E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS)); 323 E32(0); 324 E32(0); // Ref value 325 E32(EVERGREEN_VLINE_STAT); // Mask 326 E32(10); // Wait interval 327 /* add crtc reloc */ 328 PACK3(IT_NOP, 1); 329 E32(drmmode_crtc->mode_crtc->crtc_id); 330 END_BATCH(); 331} 332 333void 334evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 335{ 336 RADEONInfoPtr info = RADEONPTR(pScrn); 337 338 BEGIN_BATCH(8); 339 /* Interpolator setup */ 340 EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 341 PACK0(SPI_PS_IN_CONTROL_0, 3); 342 E32(((num_interp << NUM_INTERP_shift) | 343 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 344 E32(0); // SPI_PS_IN_CONTROL_1 345 E32(0); // SPI_INTERP_CONTROL_0 346 END_BATCH(); 347} 348 349void 350evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 351{ 352 RADEONInfoPtr info = RADEONPTR(pScrn); 353 uint32_t sq_pgm_resources; 354 355 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 356 (fs_conf->stack_size << STACK_SIZE_shift)); 357 358 if (fs_conf->dx10_clamp) 359 sq_pgm_resources |= DX10_CLAMP_bit; 360 361 BEGIN_BATCH(3 + 2); 362 EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 363 RELOC_BATCH(fs_conf->bo, domain, 0); 364 END_BATCH(); 365 366 BEGIN_BATCH(3); 367 EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 368 END_BATCH(); 369} 370 371/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS, 372 * but none that we use here. 373 */ 374void 375evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 376{ 377 RADEONInfoPtr info = RADEONPTR(pScrn); 378 uint32_t sq_pgm_resources, sq_pgm_resources_2; 379 380 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 381 (vs_conf->stack_size << STACK_SIZE_shift)); 382 383 if (vs_conf->dx10_clamp) 384 sq_pgm_resources |= DX10_CLAMP_bit; 385 if (vs_conf->uncached_first_inst) 386 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 387 388 sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) | 389 (vs_conf->double_round << DOUBLE_ROUND_shift)); 390 391 if (vs_conf->allow_sdi) 392 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; 393 if (vs_conf->allow_sd0) 394 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; 395 if (vs_conf->allow_ddi) 396 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; 397 if (vs_conf->allow_ddo) 398 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; 399 400 /* flush SQ cache */ 401 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 402 vs_conf->shader_size, vs_conf->shader_addr, 403 vs_conf->bo, domain, 0); 404 405 BEGIN_BATCH(3 + 2); 406 EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 407 RELOC_BATCH(vs_conf->bo, domain, 0); 408 END_BATCH(); 409 410 BEGIN_BATCH(4); 411 PACK0(SQ_PGM_RESOURCES_VS, 2); 412 E32(sq_pgm_resources); 413 E32(sq_pgm_resources_2); 414 END_BATCH(); 415} 416 417/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS, 418 * but none that we use here. 419 */ 420void 421evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 422{ 423 RADEONInfoPtr info = RADEONPTR(pScrn); 424 uint32_t sq_pgm_resources, sq_pgm_resources_2; 425 426 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 427 (ps_conf->stack_size << STACK_SIZE_shift)); 428 429 if (ps_conf->dx10_clamp) 430 sq_pgm_resources |= DX10_CLAMP_bit; 431 if (ps_conf->uncached_first_inst) 432 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 433 if (ps_conf->clamp_consts) 434 sq_pgm_resources |= CLAMP_CONSTS_bit; 435 436 sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) | 437 (ps_conf->double_round << DOUBLE_ROUND_shift)); 438 439 if (ps_conf->allow_sdi) 440 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; 441 if (ps_conf->allow_sd0) 442 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; 443 if (ps_conf->allow_ddi) 444 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; 445 if (ps_conf->allow_ddo) 446 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; 447 448 /* flush SQ cache */ 449 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 450 ps_conf->shader_size, ps_conf->shader_addr, 451 ps_conf->bo, domain, 0); 452 453 BEGIN_BATCH(3 + 2); 454 EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 455 RELOC_BATCH(ps_conf->bo, domain, 0); 456 END_BATCH(); 457 458 BEGIN_BATCH(5); 459 PACK0(SQ_PGM_RESOURCES_PS, 3); 460 E32(sq_pgm_resources); 461 E32(sq_pgm_resources_2); 462 E32(ps_conf->export_mode); 463 END_BATCH(); 464} 465 466void 467evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain) 468{ 469 RADEONInfoPtr info = RADEONPTR(pScrn); 470 /* size reg is units of 16 consts (4 dwords each) */ 471 uint32_t size = const_conf->size_bytes >> 8; 472 473 if (size == 0) 474 size = 1; 475 476 /* flush SQ cache */ 477 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 478 const_conf->size_bytes, const_conf->const_addr, 479 const_conf->bo, domain, 0); 480 481 switch (const_conf->type) { 482 case SHADER_TYPE_VS: 483 BEGIN_BATCH(3); 484 EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size); 485 END_BATCH(); 486 BEGIN_BATCH(3 + 2); 487 EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8); 488 RELOC_BATCH(const_conf->bo, domain, 0); 489 END_BATCH(); 490 break; 491 case SHADER_TYPE_PS: 492 BEGIN_BATCH(3); 493 EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size); 494 END_BATCH(); 495 BEGIN_BATCH(3 + 2); 496 EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8); 497 RELOC_BATCH(const_conf->bo, domain, 0); 498 END_BATCH(); 499 break; 500 default: 501 ErrorF("Unsupported const type %d\n", const_conf->type); 502 break; 503 } 504 505} 506 507void 508evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 509{ 510 RADEONInfoPtr info = RADEONPTR(pScrn); 511 /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each 512 * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs. 513 */ 514 BEGIN_BATCH(3); 515 EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 516 END_BATCH(); 517} 518 519/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0, 520 * but none that we use here. 521 */ 522static void 523evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 524{ 525 RADEONInfoPtr info = RADEONPTR(pScrn); 526 struct radeon_accel_state *accel_state = info->accel_state; 527 uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4; 528 529 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 530 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 531 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 532 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 533 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 534 if (res->clamp_x) 535 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 536 537 if (res->format_comp_all) 538 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 539 540 if (res->srf_mode_all) 541 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 542 543 sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) | 544 (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) | 545 (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) | 546 (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift)); 547 548 if (res->uncached) 549 sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit; 550 551 /* XXX ??? */ 552 sq_vtx_constant_word4 = 0; 553 554 /* flush vertex cache */ 555 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) || 556 (info->ChipFamily == CHIP_FAMILY_PALM) || 557 (info->ChipFamily == CHIP_FAMILY_CAICOS) || 558 (info->ChipFamily == CHIP_FAMILY_CAYMAN)) 559 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 560 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 561 res->bo, 562 domain, 0); 563 else 564 evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 565 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 566 res->bo, 567 domain, 0); 568 569 BEGIN_BATCH(10 + 2); 570 PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8); 571 E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 572 E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 573 E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 574 E32(sq_vtx_constant_word3); // 3: swizzles 575 E32(sq_vtx_constant_word4); // 4: num elements 576 E32(0); // 5: n/a 577 E32(0); // 6: n/a 578 E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE 579 RELOC_BATCH(res->bo, domain, 0); 580 END_BATCH(); 581} 582 583/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0, 584 * but none that we use here. 585 */ 586void 587evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 588{ 589 RADEONInfoPtr info = RADEONPTR(pScrn); 590 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 591 uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7; 592 593 sq_tex_resource_word0 = (tex_res->dim << DIM_shift); 594 595 if (tex_res->w) 596 sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | 597 ((tex_res->w - 1) << TEX_WIDTH_shift)); 598 599 if (tex_res->tile_type) 600 sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit; 601 602 sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift); 603 604 if (tex_res->h) 605 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 606 if (tex_res->depth) 607 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 608 609 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 610 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 611 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 612 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 613 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 614 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 615 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 616 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 617 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 618 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 619 (tex_res->base_level << BASE_LEVEL_shift)); 620 621 if (tex_res->srf_mode_all) 622 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 623 if (tex_res->force_degamma) 624 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 625 626 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 627 (tex_res->base_array << BASE_ARRAY_shift) | 628 (tex_res->last_array << LAST_ARRAY_shift)); 629 630 sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) | 631 (tex_res->perf_modulation << PERF_MODULATION_shift)); 632 633 if (tex_res->interlaced) 634 sq_tex_resource_word6 |= INTERLACED_bit; 635 636 sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) | 637 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift)); 638 639 /* flush texture cache */ 640 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 641 tex_res->size, tex_res->base, 642 tex_res->bo, domain, 0); 643 644 BEGIN_BATCH(10 + 4); 645 PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8); 646 E32(sq_tex_resource_word0); 647 E32(sq_tex_resource_word1); 648 E32(((tex_res->base) >> 8)); 649 E32(((tex_res->mip_base) >> 8)); 650 E32(sq_tex_resource_word4); 651 E32(sq_tex_resource_word5); 652 E32(sq_tex_resource_word6); 653 E32(sq_tex_resource_word7); 654 RELOC_BATCH(tex_res->bo, domain, 0); 655 RELOC_BATCH(tex_res->mip_bo, domain, 0); 656 END_BATCH(); 657} 658 659/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0, 660 * but none that we use here. 661 */ 662void 663evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 664{ 665 RADEONInfoPtr info = RADEONPTR(pScrn); 666 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 667 668 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 669 (s->clamp_y << CLAMP_Y_shift) | 670 (s->clamp_z << CLAMP_Z_shift) | 671 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 672 (s->xy_min_filter << XY_MIN_FILTER_shift) | 673 (s->z_filter << Z_FILTER_shift) | 674 (s->mip_filter << MIP_FILTER_shift) | 675 (s->border_color << BORDER_COLOR_TYPE_shift) | 676 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 677 (s->chroma_key << CHROMA_KEY_shift)); 678 679 sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) | 680 (s->max_lod << MAX_LOD_shift) | 681 (s->perf_mip << PERF_MIP_shift) | 682 (s->perf_z << PERF_Z_shift)); 683 684 685 sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) | 686 (s->lod_bias2 << LOD_BIAS_SEC_shift)); 687 688 if (s->mc_coord_truncate) 689 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 690 if (s->force_degamma) 691 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 692 if (s->truncate_coord) 693 sq_tex_sampler_word2 |= TRUNCATE_COORD_bit; 694 if (s->disable_cube_wrap) 695 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit; 696 if (s->type) 697 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 698 699 BEGIN_BATCH(5); 700 PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 701 E32(sq_tex_sampler_word0); 702 E32(sq_tex_sampler_word1); 703 E32(sq_tex_sampler_word2); 704 END_BATCH(); 705} 706 707/* workarounds for hw bugs in eg+ */ 708/* only affects screen/window/generic/vport. cliprects are not affected */ 709static void 710evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2) 711{ 712 RADEONInfoPtr info = RADEONPTR(pScrn); 713 714 /* all eg+ asics */ 715 if (*x2 == 0) 716 *x1 = 1; 717 if (*y2 == 0) 718 *y1 = 1; 719 720 /* cayman only */ 721 if (info->ChipFamily == CHIP_FAMILY_CAYMAN) { 722 /* cliprects aren't affected so we can use them to clip if we need 723 * a true 1x1 clip region 724 */ 725 if ((*x2 == 1) && (*y2 == 1)) 726 *x2 = 2; 727 } 728} 729 730//XXX deal with clip offsets in clip setup 731void 732evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 733{ 734 RADEONInfoPtr info = RADEONPTR(pScrn); 735 736 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 737 738 BEGIN_BATCH(4); 739 PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 740 E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 741 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 742 E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 743 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 744 END_BATCH(); 745} 746 747void 748evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 749{ 750 RADEONInfoPtr info = RADEONPTR(pScrn); 751 752 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 753 754 BEGIN_BATCH(4); 755 PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 756 E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 757 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 758 WINDOW_OFFSET_DISABLE_bit)); 759 E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 760 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 761 END_BATCH(); 762} 763 764void 765evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 766{ 767 RADEONInfoPtr info = RADEONPTR(pScrn); 768 769 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 770 771 BEGIN_BATCH(4); 772 PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 773 E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 774 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 775 WINDOW_OFFSET_DISABLE_bit)); 776 E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 777 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 778 END_BATCH(); 779} 780 781void 782evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 783{ 784 RADEONInfoPtr info = RADEONPTR(pScrn); 785 786 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 787 788 BEGIN_BATCH(4); 789 PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 790 E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 791 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 792 WINDOW_OFFSET_DISABLE_bit)); 793 E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 794 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 795 END_BATCH(); 796} 797 798void 799evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 800{ 801 RADEONInfoPtr info = RADEONPTR(pScrn); 802 803 BEGIN_BATCH(4); 804 PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 805 E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 806 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 807 E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 808 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 809 END_BATCH(); 810} 811 812/* 813 * Setup of default state 814 */ 815 816void 817evergreen_set_default_state(ScrnInfoPtr pScrn) 818{ 819 tex_resource_t tex_res; 820 shader_config_t fs_conf; 821 sq_config_t sq_conf; 822 int i; 823 RADEONInfoPtr info = RADEONPTR(pScrn); 824 struct radeon_accel_state *accel_state = info->accel_state; 825 826 if (info->ChipFamily == CHIP_FAMILY_CAYMAN) { 827 cayman_set_default_state(pScrn); 828 return; 829 } 830 831 if (accel_state->XInited3D) 832 return; 833 834 memset(&tex_res, 0, sizeof(tex_resource_t)); 835 memset(&fs_conf, 0, sizeof(shader_config_t)); 836 837 accel_state->XInited3D = TRUE; 838 839 evergreen_start_3d(pScrn); 840 841 /* SQ */ 842 sq_conf.ps_prio = 0; 843 sq_conf.vs_prio = 1; 844 sq_conf.gs_prio = 2; 845 sq_conf.es_prio = 3; 846 sq_conf.hs_prio = 0; 847 sq_conf.ls_prio = 0; 848 sq_conf.cs_prio = 0; 849 850 switch (info->ChipFamily) { 851 case CHIP_FAMILY_CEDAR: 852 default: 853 sq_conf.num_ps_gprs = 93; 854 sq_conf.num_vs_gprs = 46; 855 sq_conf.num_temp_gprs = 4; 856 sq_conf.num_gs_gprs = 31; 857 sq_conf.num_es_gprs = 31; 858 sq_conf.num_hs_gprs = 23; 859 sq_conf.num_ls_gprs = 23; 860 sq_conf.num_ps_threads = 96; 861 sq_conf.num_vs_threads = 16; 862 sq_conf.num_gs_threads = 16; 863 sq_conf.num_es_threads = 16; 864 sq_conf.num_hs_threads = 16; 865 sq_conf.num_ls_threads = 16; 866 sq_conf.num_ps_stack_entries = 42; 867 sq_conf.num_vs_stack_entries = 42; 868 sq_conf.num_gs_stack_entries = 42; 869 sq_conf.num_es_stack_entries = 42; 870 sq_conf.num_hs_stack_entries = 42; 871 sq_conf.num_ls_stack_entries = 42; 872 break; 873 case CHIP_FAMILY_REDWOOD: 874 sq_conf.num_ps_gprs = 93; 875 sq_conf.num_vs_gprs = 46; 876 sq_conf.num_temp_gprs = 4; 877 sq_conf.num_gs_gprs = 31; 878 sq_conf.num_es_gprs = 31; 879 sq_conf.num_hs_gprs = 23; 880 sq_conf.num_ls_gprs = 23; 881 sq_conf.num_ps_threads = 128; 882 sq_conf.num_vs_threads = 20; 883 sq_conf.num_gs_threads = 20; 884 sq_conf.num_es_threads = 20; 885 sq_conf.num_hs_threads = 20; 886 sq_conf.num_ls_threads = 20; 887 sq_conf.num_ps_stack_entries = 42; 888 sq_conf.num_vs_stack_entries = 42; 889 sq_conf.num_gs_stack_entries = 42; 890 sq_conf.num_es_stack_entries = 42; 891 sq_conf.num_hs_stack_entries = 42; 892 sq_conf.num_ls_stack_entries = 42; 893 break; 894 case CHIP_FAMILY_JUNIPER: 895 sq_conf.num_ps_gprs = 93; 896 sq_conf.num_vs_gprs = 46; 897 sq_conf.num_temp_gprs = 4; 898 sq_conf.num_gs_gprs = 31; 899 sq_conf.num_es_gprs = 31; 900 sq_conf.num_hs_gprs = 23; 901 sq_conf.num_ls_gprs = 23; 902 sq_conf.num_ps_threads = 128; 903 sq_conf.num_vs_threads = 20; 904 sq_conf.num_gs_threads = 20; 905 sq_conf.num_es_threads = 20; 906 sq_conf.num_hs_threads = 20; 907 sq_conf.num_ls_threads = 20; 908 sq_conf.num_ps_stack_entries = 85; 909 sq_conf.num_vs_stack_entries = 85; 910 sq_conf.num_gs_stack_entries = 85; 911 sq_conf.num_es_stack_entries = 85; 912 sq_conf.num_hs_stack_entries = 85; 913 sq_conf.num_ls_stack_entries = 85; 914 break; 915 case CHIP_FAMILY_CYPRESS: 916 case CHIP_FAMILY_HEMLOCK: 917 sq_conf.num_ps_gprs = 93; 918 sq_conf.num_vs_gprs = 46; 919 sq_conf.num_temp_gprs = 4; 920 sq_conf.num_gs_gprs = 31; 921 sq_conf.num_es_gprs = 31; 922 sq_conf.num_hs_gprs = 23; 923 sq_conf.num_ls_gprs = 23; 924 sq_conf.num_ps_threads = 128; 925 sq_conf.num_vs_threads = 20; 926 sq_conf.num_gs_threads = 20; 927 sq_conf.num_es_threads = 20; 928 sq_conf.num_hs_threads = 20; 929 sq_conf.num_ls_threads = 20; 930 sq_conf.num_ps_stack_entries = 85; 931 sq_conf.num_vs_stack_entries = 85; 932 sq_conf.num_gs_stack_entries = 85; 933 sq_conf.num_es_stack_entries = 85; 934 sq_conf.num_hs_stack_entries = 85; 935 sq_conf.num_ls_stack_entries = 85; 936 break; 937 case CHIP_FAMILY_PALM: 938 sq_conf.num_ps_gprs = 93; 939 sq_conf.num_vs_gprs = 46; 940 sq_conf.num_temp_gprs = 4; 941 sq_conf.num_gs_gprs = 31; 942 sq_conf.num_es_gprs = 31; 943 sq_conf.num_hs_gprs = 23; 944 sq_conf.num_ls_gprs = 23; 945 sq_conf.num_ps_threads = 96; 946 sq_conf.num_vs_threads = 16; 947 sq_conf.num_gs_threads = 16; 948 sq_conf.num_es_threads = 16; 949 sq_conf.num_hs_threads = 16; 950 sq_conf.num_ls_threads = 16; 951 sq_conf.num_ps_stack_entries = 42; 952 sq_conf.num_vs_stack_entries = 42; 953 sq_conf.num_gs_stack_entries = 42; 954 sq_conf.num_es_stack_entries = 42; 955 sq_conf.num_hs_stack_entries = 42; 956 sq_conf.num_ls_stack_entries = 42; 957 break; 958 case CHIP_FAMILY_BARTS: 959 sq_conf.num_ps_gprs = 93; 960 sq_conf.num_vs_gprs = 46; 961 sq_conf.num_temp_gprs = 4; 962 sq_conf.num_gs_gprs = 31; 963 sq_conf.num_es_gprs = 31; 964 sq_conf.num_hs_gprs = 23; 965 sq_conf.num_ls_gprs = 23; 966 sq_conf.num_ps_threads = 128; 967 sq_conf.num_vs_threads = 20; 968 sq_conf.num_gs_threads = 20; 969 sq_conf.num_es_threads = 20; 970 sq_conf.num_hs_threads = 20; 971 sq_conf.num_ls_threads = 20; 972 sq_conf.num_ps_stack_entries = 85; 973 sq_conf.num_vs_stack_entries = 85; 974 sq_conf.num_gs_stack_entries = 85; 975 sq_conf.num_es_stack_entries = 85; 976 sq_conf.num_hs_stack_entries = 85; 977 sq_conf.num_ls_stack_entries = 85; 978 break; 979 case CHIP_FAMILY_TURKS: 980 sq_conf.num_ps_gprs = 93; 981 sq_conf.num_vs_gprs = 46; 982 sq_conf.num_temp_gprs = 4; 983 sq_conf.num_gs_gprs = 31; 984 sq_conf.num_es_gprs = 31; 985 sq_conf.num_hs_gprs = 23; 986 sq_conf.num_ls_gprs = 23; 987 sq_conf.num_ps_threads = 128; 988 sq_conf.num_vs_threads = 20; 989 sq_conf.num_gs_threads = 20; 990 sq_conf.num_es_threads = 20; 991 sq_conf.num_hs_threads = 20; 992 sq_conf.num_ls_threads = 20; 993 sq_conf.num_ps_stack_entries = 42; 994 sq_conf.num_vs_stack_entries = 42; 995 sq_conf.num_gs_stack_entries = 42; 996 sq_conf.num_es_stack_entries = 42; 997 sq_conf.num_hs_stack_entries = 42; 998 sq_conf.num_ls_stack_entries = 42; 999 break; 1000 case CHIP_FAMILY_CAICOS: 1001 sq_conf.num_ps_gprs = 93; 1002 sq_conf.num_vs_gprs = 46; 1003 sq_conf.num_temp_gprs = 4; 1004 sq_conf.num_gs_gprs = 31; 1005 sq_conf.num_es_gprs = 31; 1006 sq_conf.num_hs_gprs = 23; 1007 sq_conf.num_ls_gprs = 23; 1008 sq_conf.num_ps_threads = 128; 1009 sq_conf.num_vs_threads = 10; 1010 sq_conf.num_gs_threads = 10; 1011 sq_conf.num_es_threads = 10; 1012 sq_conf.num_hs_threads = 10; 1013 sq_conf.num_ls_threads = 10; 1014 sq_conf.num_ps_stack_entries = 42; 1015 sq_conf.num_vs_stack_entries = 42; 1016 sq_conf.num_gs_stack_entries = 42; 1017 sq_conf.num_es_stack_entries = 42; 1018 sq_conf.num_hs_stack_entries = 42; 1019 sq_conf.num_ls_stack_entries = 42; 1020 break; 1021 } 1022 1023 evergreen_sq_setup(pScrn, &sq_conf); 1024 1025 BEGIN_BATCH(24); 1026 EREG(SQ_LDS_ALLOC_PS, 0); 1027 EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0); 1028 1029 PACK0(SQ_ESGS_RING_ITEMSIZE, 6); 1030 E32(0); 1031 E32(0); 1032 E32(0); 1033 E32(0); 1034 E32(0); 1035 E32(0); 1036 1037 PACK0(SQ_GS_VERT_ITEMSIZE, 4); 1038 E32(0); 1039 E32(0); 1040 E32(0); 1041 E32(0); 1042 1043 PACK0(SQ_VTX_BASE_VTX_LOC, 2); 1044 E32(0); 1045 E32(0); 1046 END_BATCH(); 1047 1048 /* DB */ 1049 BEGIN_BATCH(3 + 2); 1050 EREG(DB_Z_INFO, 0); 1051 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1052 END_BATCH(); 1053 1054 BEGIN_BATCH(3 + 2); 1055 EREG(DB_STENCIL_INFO, 0); 1056 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1057 END_BATCH(); 1058 1059 BEGIN_BATCH(3 + 2); 1060 EREG(DB_HTILE_DATA_BASE, 0); 1061 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1062 END_BATCH(); 1063 1064 BEGIN_BATCH(49); 1065 EREG(DB_DEPTH_CONTROL, 0); 1066 1067 PACK0(PA_SC_VPORT_ZMIN_0, 2); 1068 EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0 1069 EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0 1070 1071 PACK0(DB_RENDER_CONTROL, 5); 1072 E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL 1073 E32(0); // DB_COUNT_CONTROL 1074 E32(0); // DB_DEPTH_VIEW 1075 E32(0x2a); // DB_RENDER_OVERRIDE 1076 E32(0); // DB_RENDER_OVERRIDE2 1077 1078 PACK0(DB_STENCIL_CLEAR, 2); 1079 E32(0); // DB_STENCIL_CLEAR 1080 E32(0); // DB_DEPTH_CLEAR 1081 1082 EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 1083 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 1084 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 1085 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 1086 1087 EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) | 1088 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 1089 1090 // SX 1091 EREG(SX_MISC, 0); 1092 1093 // CB 1094 PACK0(SX_ALPHA_TEST_CONTROL, 5); 1095 E32(0); // SX_ALPHA_TEST_CONTROL 1096 E32(0x00000000); //CB_BLEND_RED 1097 E32(0x00000000); //CB_BLEND_GREEN 1098 E32(0x00000000); //CB_BLEND_BLUE 1099 E32(0x00000000); //CB_BLEND_ALPHA 1100 1101 EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 1102 1103 // SC 1104 EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 1105 (0 << WINDOW_Y_OFFSET_shift))); 1106 EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 1107 EREG(PA_SC_EDGERULE, 0xAAAAAAAA); 1108 EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0); 1109 END_BATCH(); 1110 1111 /* clip boolean is set to always visible -> doesn't matter */ 1112 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1113 evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192); 1114 1115 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1116 evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192); 1117 1118 BEGIN_BATCH(57); 1119 PACK0(PA_SC_MODE_CNTL_0, 2); 1120 E32(0); // PA_SC_MODE_CNTL_0 1121 E32(0); // PA_SC_MODE_CNTL_1 1122 1123 PACK0(PA_SC_LINE_CNTL, 16); 1124 E32(0); // PA_SC_LINE_CNTL 1125 E32(0); // PA_SC_AA_CONFIG 1126 E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | 1127 PIX_CENTER_bit)); // PA_SU_VTX_CNTL 1128 EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 1129 EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 1130 EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1131 EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 1132 E32(0); // PA_SC_AA_SAMPLE_LOCS_0 1133 E32(0); 1134 E32(0); 1135 E32(0); 1136 E32(0); 1137 E32(0); 1138 E32(0); 1139 E32(0); // PA_SC_AA_SAMPLE_LOCS_7 1140 E32(0xFFFFFFFF); // PA_SC_AA_MASK 1141 1142 // CL 1143 PACK0(PA_CL_CLIP_CNTL, 8); 1144 E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1145 E32(FACE_bit); // PA_SU_SC_MODE_CNTL 1146 E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1147 E32(0); // PA_CL_VS_OUT_CNTL 1148 E32(0); // PA_CL_NANINF_CNTL 1149 E32(0); // PA_SU_LINE_STIPPLE_CNTL 1150 E32(0); // PA_SU_LINE_STIPPLE_SCALE 1151 E32(0); // PA_SU_PRIM_FILTER_CNTL 1152 1153 // SU 1154 PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1155 E32(0); 1156 E32(0); 1157 E32(0); 1158 E32(0); 1159 E32(0); 1160 E32(0); 1161 1162 /* src = semantic id 0; mask = semantic id 1 */ 1163 EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1164 (1 << SEMANTIC_1_shift))); 1165 PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1166 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1167 E32(((0 << SEMANTIC_shift) | 1168 (0x01 << DEFAULT_VAL_shift))); 1169 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1170 E32(((1 << SEMANTIC_shift) | 1171 (0x01 << DEFAULT_VAL_shift))); 1172 1173 PACK0(SPI_INPUT_Z, 8); 1174 E32(0); // SPI_INPUT_Z 1175 E32(0); // SPI_FOG_CNTL 1176 E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL 1177 E32(0); // SPI_PS_IN_CONTROL_2 1178 E32(0); 1179 E32(0); 1180 E32(0); 1181 E32(0); 1182 END_BATCH(); 1183 1184 // clear FS 1185 fs_conf.bo = accel_state->shaders_bo; 1186 evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1187 1188 // VGT 1189 BEGIN_BATCH(46); 1190 1191 PACK0(VGT_MAX_VTX_INDX, 4); 1192 E32(0xffffff); 1193 E32(0); 1194 E32(0); 1195 E32(0); 1196 1197 PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 1198 E32(0); 1199 E32(0); 1200 1201 PACK0(VGT_REUSE_OFF, 2); 1202 E32(0); 1203 E32(0); 1204 1205 PACK0(PA_SU_POINT_SIZE, 17); 1206 E32(0); // PA_SU_POINT_SIZE 1207 E32(0); // PA_SU_POINT_MINMAX 1208 E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1209 E32(0); // PA_SC_LINE_STIPPLE 1210 E32(0); // VGT_OUTPUT_PATH_CNTL 1211 E32(0); // VGT_HOS_CNTL 1212 E32(0); 1213 E32(0); 1214 E32(0); 1215 E32(0); 1216 E32(0); 1217 E32(0); 1218 E32(0); 1219 E32(0); 1220 E32(0); 1221 E32(0); 1222 E32(0); // VGT_GS_MODE 1223 1224 EREG(VGT_PRIMITIVEID_EN, 0); 1225 EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 1226 EREG(VGT_SHADER_STAGES_EN, 0); 1227 1228 PACK0(VGT_STRMOUT_CONFIG, 2); 1229 E32(0); 1230 E32(0); 1231 END_BATCH(); 1232} 1233 1234 1235/* 1236 * Commands 1237 */ 1238 1239void 1240evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1241{ 1242 RADEONInfoPtr info = RADEONPTR(pScrn); 1243 1244 BEGIN_BATCH(10); 1245 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1246 PACK3(IT_INDEX_TYPE, 1); 1247#if X_BYTE_ORDER == X_BIG_ENDIAN 1248 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1249#else 1250 E32(draw_conf->index_type); 1251#endif 1252 PACK3(IT_NUM_INSTANCES, 1); 1253 E32(draw_conf->num_instances); 1254 PACK3(IT_DRAW_INDEX_AUTO, 2); 1255 E32(draw_conf->num_indices); 1256 E32(draw_conf->vgt_draw_initiator); 1257 END_BATCH(); 1258} 1259 1260void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1261{ 1262 RADEONInfoPtr info = RADEONPTR(pScrn); 1263 struct radeon_accel_state *accel_state = info->accel_state; 1264 draw_config_t draw_conf; 1265 vtx_resource_t vtx_res; 1266 1267 if (accel_state->vbo.vb_start_op == -1) 1268 return; 1269 1270 CLEAR (draw_conf); 1271 CLEAR (vtx_res); 1272 1273 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1274 radeon_ib_discard(pScrn); 1275 radeon_cs_flush_indirect(pScrn); 1276 return; 1277 } 1278 1279 /* Vertex buffer setup */ 1280 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1281 vtx_res.id = SQ_FETCH_RESOURCE_vs; 1282 vtx_res.vtx_size_dw = vtx_size / 4; 1283 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1284 vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op; 1285 vtx_res.bo = accel_state->vbo.vb_bo; 1286 vtx_res.dst_sel_x = SQ_SEL_X; 1287 vtx_res.dst_sel_y = SQ_SEL_Y; 1288 vtx_res.dst_sel_z = SQ_SEL_Z; 1289 vtx_res.dst_sel_w = SQ_SEL_W; 1290#if X_BYTE_ORDER == X_BIG_ENDIAN 1291 vtx_res.endian = SQ_ENDIAN_8IN32; 1292#endif 1293 evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1294 1295 /* Draw */ 1296 draw_conf.prim_type = DI_PT_RECTLIST; 1297 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1298 draw_conf.num_instances = 1; 1299 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1300 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1301 1302 evergreen_draw_auto(pScrn, &draw_conf); 1303 1304 /* sync dst surface */ 1305 evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1306 accel_state->dst_size, accel_state->dst_obj.offset, 1307 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1308 1309 accel_state->vbo.vb_start_op = -1; 1310 accel_state->cbuf.vb_start_op = -1; 1311 accel_state->ib_reset_op = 0; 1312 1313} 1314 1315#endif 1316