evergreen_accel.c revision b13dfe66
1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#ifdef XF86DRM_MODE 31 32#include "xf86.h" 33 34#include <errno.h> 35 36#include "radeon.h" 37#include "evergreen_shader.h" 38#include "radeon_reg.h" 39#include "evergreen_reg.h" 40#include "evergreen_state.h" 41 42#include "radeon_drm.h" 43#include "radeon_vbo.h" 44#include "radeon_exa_shared.h" 45 46static const uint32_t EVERGREEN_ROP[16] = { 47 RADEON_ROP3_ZERO, /* GXclear */ 48 RADEON_ROP3_DSa, /* Gxand */ 49 RADEON_ROP3_SDna, /* GXandReverse */ 50 RADEON_ROP3_S, /* GXcopy */ 51 RADEON_ROP3_DSna, /* GXandInverted */ 52 RADEON_ROP3_D, /* GXnoop */ 53 RADEON_ROP3_DSx, /* GXxor */ 54 RADEON_ROP3_DSo, /* GXor */ 55 RADEON_ROP3_DSon, /* GXnor */ 56 RADEON_ROP3_DSxn, /* GXequiv */ 57 RADEON_ROP3_Dn, /* GXinvert */ 58 RADEON_ROP3_SDno, /* GXorReverse */ 59 RADEON_ROP3_Sn, /* GXcopyInverted */ 60 RADEON_ROP3_DSno, /* GXorInverted */ 61 RADEON_ROP3_DSan, /* GXnand */ 62 RADEON_ROP3_ONE, /* GXset */ 63}; 64 65void 66evergreen_start_3d(ScrnInfoPtr pScrn) 67{ 68 RADEONInfoPtr info = RADEONPTR(pScrn); 69 70 BEGIN_BATCH(3); 71 PACK3(IT_CONTEXT_CONTROL, 2); 72 E32(0x80000000); 73 E32(0x80000000); 74 END_BATCH(); 75 76} 77 78/* 79 * Setup of functional groups 80 */ 81 82// asic stack/thread/gpr limits - need to query the drm 83static void 84evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 85{ 86 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; 87 uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; 88 uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; 89 RADEONInfoPtr info = RADEONPTR(pScrn); 90 91 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) || 92 (info->ChipFamily == CHIP_FAMILY_PALM) || 93 (info->ChipFamily == CHIP_FAMILY_CAICOS)) 94 sq_config = 0; 95 else 96 sq_config = VC_ENABLE_bit; 97 98 sq_config |= (EXPORT_SRC_C_bit | 99 (sq_conf->cs_prio << CS_PRIO_shift) | 100 (sq_conf->ls_prio << LS_PRIO_shift) | 101 (sq_conf->hs_prio << HS_PRIO_shift) | 102 (sq_conf->ps_prio << PS_PRIO_shift) | 103 (sq_conf->vs_prio << VS_PRIO_shift) | 104 (sq_conf->gs_prio << GS_PRIO_shift) | 105 (sq_conf->es_prio << ES_PRIO_shift)); 106 107 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 108 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 109 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 110 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 111 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 112 sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) | 113 (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift)); 114 115 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 116 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 117 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 118 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 119 sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) | 120 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift)); 121 122 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 123 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 124 125 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 126 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 127 128 sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) | 129 (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift)); 130 131 BEGIN_BATCH(16); 132 /* disable dyn gprs */ 133 EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 134 PACK0(SQ_CONFIG, 4); 135 E32(sq_config); 136 E32(sq_gpr_resource_mgmt_1); 137 E32(sq_gpr_resource_mgmt_2); 138 E32(sq_gpr_resource_mgmt_3); 139 PACK0(SQ_THREAD_RESOURCE_MGMT, 5); 140 E32(sq_thread_resource_mgmt); 141 E32(sq_thread_resource_mgmt_2); 142 E32(sq_stack_resource_mgmt_1); 143 E32(sq_stack_resource_mgmt_2); 144 E32(sq_stack_resource_mgmt_3); 145 END_BATCH(); 146} 147 148void 149evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 150{ 151 uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim; 152 int pitch, slice, h; 153 RADEONInfoPtr info = RADEONPTR(pScrn); 154 155 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 156 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 157 (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 158 (cb_conf->number_type << NUMBER_TYPE_shift) | 159 (cb_conf->comp_swap << COMP_SWAP_shift) | 160 (cb_conf->source_format << SOURCE_FORMAT_shift) | 161 (cb_conf->resource_type << RESOURCE_TYPE_shift)); 162 if (cb_conf->blend_clamp) 163 cb_color_info |= BLEND_CLAMP_bit; 164 if (cb_conf->fast_clear) 165 cb_color_info |= FAST_CLEAR_bit; 166 if (cb_conf->compression) 167 cb_color_info |= COMPRESSION_bit; 168 if (cb_conf->blend_bypass) 169 cb_color_info |= BLEND_BYPASS_bit; 170 if (cb_conf->simple_float) 171 cb_color_info |= SIMPLE_FLOAT_bit; 172 if (cb_conf->round_mode) 173 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 174 if (cb_conf->tile_compact) 175 cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit; 176 if (cb_conf->rat) 177 cb_color_info |= RAT_bit; 178 179 /* bit 4 needs to be set for linear and depth/stencil surfaces */ 180 if (cb_conf->non_disp_tiling) 181 cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit; 182 183 pitch = (cb_conf->w / 8) - 1; 184 h = RADEON_ALIGN(cb_conf->h, 8); 185 slice = ((cb_conf->w * h) / 64) - 1; 186 187 switch (cb_conf->resource_type) { 188 case BUFFER: 189 /* number of elements in the surface */ 190 cb_color_dim = pitch * slice; 191 break; 192 default: 193 /* w/h of the surface */ 194 cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) | 195 ((cb_conf->h - 1) << HEIGHT_MAX_shift)); 196 break; 197 } 198 199 BEGIN_BATCH(3 + 2); 200 EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8)); 201 RELOC_BATCH(cb_conf->bo, 0, domain); 202 END_BATCH(); 203 204 /* Set CMASK & FMASK buffer to the offset of color buffer as 205 * we don't use those this shouldn't cause any issue and we 206 * then have a valid cmd stream 207 */ 208 BEGIN_BATCH(3 + 2); 209 EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8)); 210 RELOC_BATCH(cb_conf->bo, 0, domain); 211 END_BATCH(); 212 BEGIN_BATCH(3 + 2); 213 EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8)); 214 RELOC_BATCH(cb_conf->bo, 0, domain); 215 END_BATCH(); 216 217 /* tiling config */ 218 BEGIN_BATCH(3 + 2); 219 EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib); 220 RELOC_BATCH(cb_conf->bo, 0, domain); 221 END_BATCH(); 222 BEGIN_BATCH(3 + 2); 223 EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info); 224 RELOC_BATCH(cb_conf->bo, 0, domain); 225 END_BATCH(); 226 227 BEGIN_BATCH(33); 228 EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch); 229 EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice); 230 EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0); 231 EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim); 232 EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0); 233 EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0); 234 PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4); 235 E32(0); 236 E32(0); 237 E32(0); 238 E32(0); 239 EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 240 EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[cb_conf->rop] | 241 (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); 242 EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 243 END_BATCH(); 244 245} 246 247static void 248evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 249 uint32_t size, uint64_t mc_addr, 250 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 251{ 252 RADEONInfoPtr info = RADEONPTR(pScrn); 253 uint32_t cp_coher_size; 254 if (size == 0xffffffff) 255 cp_coher_size = 0xffffffff; 256 else 257 cp_coher_size = ((size + 255) >> 8); 258 259 BEGIN_BATCH(5 + 2); 260 PACK3(IT_SURFACE_SYNC, 4); 261 E32(sync_type); 262 E32(cp_coher_size); 263 E32((mc_addr >> 8)); 264 E32(10); /* poll interval */ 265 RELOC_BATCH(bo, rdomains, wdomain); 266 END_BATCH(); 267} 268 269/* inserts a wait for vline in the command stream */ 270void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 271 xf86CrtcPtr crtc, int start, int stop) 272{ 273 RADEONInfoPtr info = RADEONPTR(pScrn); 274 drmmode_crtc_private_ptr drmmode_crtc; 275 uint32_t offset; 276 277 if (!crtc) 278 return; 279 280 drmmode_crtc = crtc->driver_private; 281 282 if (stop < start) 283 return; 284 285 if (!crtc->enabled) 286 return; 287 288 if (info->cs) { 289 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 290 return; 291 } else { 292#ifdef USE_EXA 293 if (info->useEXA) 294 offset = exaGetPixmapOffset(pPix); 295 else 296#endif 297 offset = pPix->devPrivate.ptr - info->FB; 298 299 /* if drawing to front buffer */ 300 if (offset != 0) 301 return; 302 } 303 304 start = max(start, 0); 305 stop = min(stop, crtc->mode.VDisplay); 306 307 if (start > crtc->mode.VDisplay) 308 return; 309 310 BEGIN_BATCH(11); 311 /* set the VLINE range */ 312 EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */ 313 (start << EVERGREEN_VLINE_START_SHIFT) | 314 (stop << EVERGREEN_VLINE_END_SHIFT)); 315 316 /* tell the CP to poll the VLINE state register */ 317 PACK3(IT_WAIT_REG_MEM, 6); 318 E32(IT_WAIT_REG | IT_WAIT_EQ); 319 E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS)); 320 E32(0); 321 E32(0); // Ref value 322 E32(EVERGREEN_VLINE_STAT); // Mask 323 E32(10); // Wait interval 324 /* add crtc reloc */ 325 PACK3(IT_NOP, 1); 326 E32(drmmode_crtc->mode_crtc->crtc_id); 327 END_BATCH(); 328} 329 330void 331evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 332{ 333 RADEONInfoPtr info = RADEONPTR(pScrn); 334 335 BEGIN_BATCH(8); 336 /* Interpolator setup */ 337 EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 338 PACK0(SPI_PS_IN_CONTROL_0, 3); 339 E32(((num_interp << NUM_INTERP_shift) | 340 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 341 E32(0); // SPI_PS_IN_CONTROL_1 342 E32(0); // SPI_INTERP_CONTROL_0 343 END_BATCH(); 344} 345 346void 347evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 348{ 349 RADEONInfoPtr info = RADEONPTR(pScrn); 350 uint32_t sq_pgm_resources; 351 352 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 353 (fs_conf->stack_size << STACK_SIZE_shift)); 354 355 if (fs_conf->dx10_clamp) 356 sq_pgm_resources |= DX10_CLAMP_bit; 357 358 BEGIN_BATCH(3 + 2); 359 EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 360 RELOC_BATCH(fs_conf->bo, domain, 0); 361 END_BATCH(); 362 363 BEGIN_BATCH(3); 364 EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 365 END_BATCH(); 366} 367 368void 369evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 370{ 371 RADEONInfoPtr info = RADEONPTR(pScrn); 372 uint32_t sq_pgm_resources, sq_pgm_resources_2; 373 374 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 375 (vs_conf->stack_size << STACK_SIZE_shift)); 376 377 if (vs_conf->dx10_clamp) 378 sq_pgm_resources |= DX10_CLAMP_bit; 379 if (vs_conf->uncached_first_inst) 380 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 381 382 sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) | 383 (vs_conf->double_round << DOUBLE_ROUND_shift)); 384 385 if (vs_conf->allow_sdi) 386 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; 387 if (vs_conf->allow_sd0) 388 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; 389 if (vs_conf->allow_ddi) 390 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; 391 if (vs_conf->allow_ddo) 392 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; 393 394 /* flush SQ cache */ 395 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 396 vs_conf->shader_size, vs_conf->shader_addr, 397 vs_conf->bo, domain, 0); 398 399 BEGIN_BATCH(3 + 2); 400 EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 401 RELOC_BATCH(vs_conf->bo, domain, 0); 402 END_BATCH(); 403 404 BEGIN_BATCH(4); 405 PACK0(SQ_PGM_RESOURCES_VS, 2); 406 E32(sq_pgm_resources); 407 E32(sq_pgm_resources_2); 408 END_BATCH(); 409} 410 411void 412evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 413{ 414 RADEONInfoPtr info = RADEONPTR(pScrn); 415 uint32_t sq_pgm_resources, sq_pgm_resources_2; 416 417 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 418 (ps_conf->stack_size << STACK_SIZE_shift)); 419 420 if (ps_conf->dx10_clamp) 421 sq_pgm_resources |= DX10_CLAMP_bit; 422 if (ps_conf->uncached_first_inst) 423 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 424 if (ps_conf->clamp_consts) 425 sq_pgm_resources |= CLAMP_CONSTS_bit; 426 427 sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) | 428 (ps_conf->double_round << DOUBLE_ROUND_shift)); 429 430 if (ps_conf->allow_sdi) 431 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; 432 if (ps_conf->allow_sd0) 433 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; 434 if (ps_conf->allow_ddi) 435 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; 436 if (ps_conf->allow_ddo) 437 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; 438 439 /* flush SQ cache */ 440 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 441 ps_conf->shader_size, ps_conf->shader_addr, 442 ps_conf->bo, domain, 0); 443 444 BEGIN_BATCH(3 + 2); 445 EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 446 RELOC_BATCH(ps_conf->bo, domain, 0); 447 END_BATCH(); 448 449 BEGIN_BATCH(5); 450 PACK0(SQ_PGM_RESOURCES_PS, 3); 451 E32(sq_pgm_resources); 452 E32(sq_pgm_resources_2); 453 E32(ps_conf->export_mode); 454 END_BATCH(); 455} 456 457void 458evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain) 459{ 460 RADEONInfoPtr info = RADEONPTR(pScrn); 461 /* size reg is units of 16 consts (4 dwords each) */ 462 uint32_t size = const_conf->size_bytes >> 8; 463 464 if (size == 0) 465 size = 1; 466 467 /* flush SQ cache */ 468 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 469 const_conf->size_bytes, const_conf->const_addr, 470 const_conf->bo, domain, 0); 471 472 switch (const_conf->type) { 473 case SHADER_TYPE_VS: 474 BEGIN_BATCH(3); 475 EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size); 476 END_BATCH(); 477 BEGIN_BATCH(3 + 2); 478 EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8); 479 RELOC_BATCH(const_conf->bo, domain, 0); 480 END_BATCH(); 481 break; 482 case SHADER_TYPE_PS: 483 BEGIN_BATCH(3); 484 EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size); 485 END_BATCH(); 486 BEGIN_BATCH(3 + 2); 487 EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8); 488 RELOC_BATCH(const_conf->bo, domain, 0); 489 END_BATCH(); 490 break; 491 default: 492 ErrorF("Unsupported const type %d\n", const_conf->type); 493 break; 494 } 495 496} 497 498void 499evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 500{ 501 RADEONInfoPtr info = RADEONPTR(pScrn); 502 /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each 503 * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs. 504 */ 505 BEGIN_BATCH(3); 506 EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 507 END_BATCH(); 508} 509 510static void 511evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 512{ 513 RADEONInfoPtr info = RADEONPTR(pScrn); 514 struct radeon_accel_state *accel_state = info->accel_state; 515 uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4; 516 517 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 518 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 519 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 520 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 521 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 522 if (res->clamp_x) 523 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 524 525 if (res->format_comp_all) 526 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 527 528 if (res->srf_mode_all) 529 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 530 531 sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) | 532 (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) | 533 (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) | 534 (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift)); 535 536 if (res->uncached) 537 sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit; 538 539 /* XXX ??? */ 540 sq_vtx_constant_word4 = 0; 541 542 /* flush vertex cache */ 543 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) || 544 (info->ChipFamily == CHIP_FAMILY_PALM) || 545 (info->ChipFamily == CHIP_FAMILY_CAICOS)) 546 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 547 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 548 res->bo, 549 domain, 0); 550 else 551 evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 552 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, 553 res->bo, 554 domain, 0); 555 556 BEGIN_BATCH(10 + 2); 557 PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8); 558 E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 559 E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 560 E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 561 E32(sq_vtx_constant_word3); // 3: swizzles 562 E32(sq_vtx_constant_word4); // 4: num elements 563 E32(0); // 5: n/a 564 E32(0); // 6: n/a 565 E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE 566 RELOC_BATCH(res->bo, domain, 0); 567 END_BATCH(); 568} 569 570void 571evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 572{ 573 RADEONInfoPtr info = RADEONPTR(pScrn); 574 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 575 uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7; 576 577 sq_tex_resource_word0 = (tex_res->dim << DIM_shift); 578 579 if (tex_res->w) 580 sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | 581 ((tex_res->w - 1) << TEX_WIDTH_shift)); 582 583 if (tex_res->tile_type) 584 sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit; 585 586 sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift); 587 588 if (tex_res->h) 589 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 590 if (tex_res->depth) 591 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 592 593 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 594 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 595 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 596 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 597 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 598 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 599 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 600 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 601 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 602 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 603 (tex_res->base_level << BASE_LEVEL_shift)); 604 605 if (tex_res->srf_mode_all) 606 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 607 if (tex_res->force_degamma) 608 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 609 610 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 611 (tex_res->base_array << BASE_ARRAY_shift) | 612 (tex_res->last_array << LAST_ARRAY_shift)); 613 614 sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) | 615 (tex_res->perf_modulation << PERF_MODULATION_shift)); 616 617 if (tex_res->interlaced) 618 sq_tex_resource_word6 |= INTERLACED_bit; 619 620 sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) | 621 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift)); 622 623 /* flush texture cache */ 624 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 625 tex_res->size, tex_res->base, 626 tex_res->bo, domain, 0); 627 628 BEGIN_BATCH(10 + 4); 629 PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8); 630 E32(sq_tex_resource_word0); 631 E32(sq_tex_resource_word1); 632 E32(((tex_res->base) >> 8)); 633 E32(((tex_res->mip_base) >> 8)); 634 E32(sq_tex_resource_word4); 635 E32(sq_tex_resource_word5); 636 E32(sq_tex_resource_word6); 637 E32(sq_tex_resource_word7); 638 RELOC_BATCH(tex_res->bo, domain, 0); 639 RELOC_BATCH(tex_res->mip_bo, domain, 0); 640 END_BATCH(); 641} 642 643void 644evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 645{ 646 RADEONInfoPtr info = RADEONPTR(pScrn); 647 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 648 649 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 650 (s->clamp_y << CLAMP_Y_shift) | 651 (s->clamp_z << CLAMP_Z_shift) | 652 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 653 (s->xy_min_filter << XY_MIN_FILTER_shift) | 654 (s->z_filter << Z_FILTER_shift) | 655 (s->mip_filter << MIP_FILTER_shift) | 656 (s->border_color << BORDER_COLOR_TYPE_shift) | 657 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 658 (s->chroma_key << CHROMA_KEY_shift)); 659 660 sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) | 661 (s->max_lod << MAX_LOD_shift) | 662 (s->perf_mip << PERF_MIP_shift) | 663 (s->perf_z << PERF_Z_shift)); 664 665 666 sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) | 667 (s->lod_bias2 << LOD_BIAS_SEC_shift)); 668 669 if (s->mc_coord_truncate) 670 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 671 if (s->force_degamma) 672 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 673 if (s->truncate_coord) 674 sq_tex_sampler_word2 |= TRUNCATE_COORD_bit; 675 if (s->disable_cube_wrap) 676 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit; 677 if (s->type) 678 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 679 680 BEGIN_BATCH(5); 681 PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 682 E32(sq_tex_sampler_word0); 683 E32(sq_tex_sampler_word1); 684 E32(sq_tex_sampler_word2); 685 END_BATCH(); 686} 687 688//XXX deal with clip offsets in clip setup 689void 690evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 691{ 692 RADEONInfoPtr info = RADEONPTR(pScrn); 693 694 BEGIN_BATCH(4); 695 PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 696 E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 697 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 698 E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 699 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 700 END_BATCH(); 701} 702 703void 704evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 705{ 706 RADEONInfoPtr info = RADEONPTR(pScrn); 707 708 BEGIN_BATCH(4); 709 PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 710 E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 711 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 712 WINDOW_OFFSET_DISABLE_bit)); 713 E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 714 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 715 END_BATCH(); 716} 717 718void 719evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 720{ 721 RADEONInfoPtr info = RADEONPTR(pScrn); 722 723 BEGIN_BATCH(4); 724 PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 725 E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 726 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 727 WINDOW_OFFSET_DISABLE_bit)); 728 E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 729 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 730 END_BATCH(); 731} 732 733void 734evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 735{ 736 RADEONInfoPtr info = RADEONPTR(pScrn); 737 738 BEGIN_BATCH(4); 739 PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 740 E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 741 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 742 WINDOW_OFFSET_DISABLE_bit)); 743 E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 744 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 745 END_BATCH(); 746} 747 748void 749evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 750{ 751 RADEONInfoPtr info = RADEONPTR(pScrn); 752 753 BEGIN_BATCH(4); 754 PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 755 E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 756 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 757 E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 758 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 759 END_BATCH(); 760} 761 762/* 763 * Setup of default state 764 */ 765 766void 767evergreen_set_default_state(ScrnInfoPtr pScrn) 768{ 769 tex_resource_t tex_res; 770 shader_config_t fs_conf; 771 sq_config_t sq_conf; 772 int i; 773 RADEONInfoPtr info = RADEONPTR(pScrn); 774 struct radeon_accel_state *accel_state = info->accel_state; 775 776 if (accel_state->XInited3D) 777 return; 778 779 memset(&tex_res, 0, sizeof(tex_resource_t)); 780 memset(&fs_conf, 0, sizeof(shader_config_t)); 781 782 accel_state->XInited3D = TRUE; 783 784 evergreen_start_3d(pScrn); 785 786 /* SQ */ 787 sq_conf.ps_prio = 0; 788 sq_conf.vs_prio = 1; 789 sq_conf.gs_prio = 2; 790 sq_conf.es_prio = 3; 791 sq_conf.hs_prio = 0; 792 sq_conf.ls_prio = 0; 793 sq_conf.cs_prio = 0; 794 795 switch (info->ChipFamily) { 796 case CHIP_FAMILY_CEDAR: 797 default: 798 sq_conf.num_ps_gprs = 93; 799 sq_conf.num_vs_gprs = 46; 800 sq_conf.num_temp_gprs = 4; 801 sq_conf.num_gs_gprs = 31; 802 sq_conf.num_es_gprs = 31; 803 sq_conf.num_hs_gprs = 23; 804 sq_conf.num_ls_gprs = 23; 805 sq_conf.num_ps_threads = 96; 806 sq_conf.num_vs_threads = 16; 807 sq_conf.num_gs_threads = 16; 808 sq_conf.num_es_threads = 16; 809 sq_conf.num_hs_threads = 16; 810 sq_conf.num_ls_threads = 16; 811 sq_conf.num_ps_stack_entries = 42; 812 sq_conf.num_vs_stack_entries = 42; 813 sq_conf.num_gs_stack_entries = 42; 814 sq_conf.num_es_stack_entries = 42; 815 sq_conf.num_hs_stack_entries = 42; 816 sq_conf.num_ls_stack_entries = 42; 817 break; 818 case CHIP_FAMILY_REDWOOD: 819 sq_conf.num_ps_gprs = 93; 820 sq_conf.num_vs_gprs = 46; 821 sq_conf.num_temp_gprs = 4; 822 sq_conf.num_gs_gprs = 31; 823 sq_conf.num_es_gprs = 31; 824 sq_conf.num_hs_gprs = 23; 825 sq_conf.num_ls_gprs = 23; 826 sq_conf.num_ps_threads = 128; 827 sq_conf.num_vs_threads = 20; 828 sq_conf.num_gs_threads = 20; 829 sq_conf.num_es_threads = 20; 830 sq_conf.num_hs_threads = 20; 831 sq_conf.num_ls_threads = 20; 832 sq_conf.num_ps_stack_entries = 42; 833 sq_conf.num_vs_stack_entries = 42; 834 sq_conf.num_gs_stack_entries = 42; 835 sq_conf.num_es_stack_entries = 42; 836 sq_conf.num_hs_stack_entries = 42; 837 sq_conf.num_ls_stack_entries = 42; 838 break; 839 case CHIP_FAMILY_JUNIPER: 840 sq_conf.num_ps_gprs = 93; 841 sq_conf.num_vs_gprs = 46; 842 sq_conf.num_temp_gprs = 4; 843 sq_conf.num_gs_gprs = 31; 844 sq_conf.num_es_gprs = 31; 845 sq_conf.num_hs_gprs = 23; 846 sq_conf.num_ls_gprs = 23; 847 sq_conf.num_ps_threads = 128; 848 sq_conf.num_vs_threads = 20; 849 sq_conf.num_gs_threads = 20; 850 sq_conf.num_es_threads = 20; 851 sq_conf.num_hs_threads = 20; 852 sq_conf.num_ls_threads = 20; 853 sq_conf.num_ps_stack_entries = 85; 854 sq_conf.num_vs_stack_entries = 85; 855 sq_conf.num_gs_stack_entries = 85; 856 sq_conf.num_es_stack_entries = 85; 857 sq_conf.num_hs_stack_entries = 85; 858 sq_conf.num_ls_stack_entries = 85; 859 break; 860 case CHIP_FAMILY_CYPRESS: 861 case CHIP_FAMILY_HEMLOCK: 862 sq_conf.num_ps_gprs = 93; 863 sq_conf.num_vs_gprs = 46; 864 sq_conf.num_temp_gprs = 4; 865 sq_conf.num_gs_gprs = 31; 866 sq_conf.num_es_gprs = 31; 867 sq_conf.num_hs_gprs = 23; 868 sq_conf.num_ls_gprs = 23; 869 sq_conf.num_ps_threads = 128; 870 sq_conf.num_vs_threads = 20; 871 sq_conf.num_gs_threads = 20; 872 sq_conf.num_es_threads = 20; 873 sq_conf.num_hs_threads = 20; 874 sq_conf.num_ls_threads = 20; 875 sq_conf.num_ps_stack_entries = 85; 876 sq_conf.num_vs_stack_entries = 85; 877 sq_conf.num_gs_stack_entries = 85; 878 sq_conf.num_es_stack_entries = 85; 879 sq_conf.num_hs_stack_entries = 85; 880 sq_conf.num_ls_stack_entries = 85; 881 break; 882 case CHIP_FAMILY_PALM: 883 sq_conf.num_ps_gprs = 93; 884 sq_conf.num_vs_gprs = 46; 885 sq_conf.num_temp_gprs = 4; 886 sq_conf.num_gs_gprs = 31; 887 sq_conf.num_es_gprs = 31; 888 sq_conf.num_hs_gprs = 23; 889 sq_conf.num_ls_gprs = 23; 890 sq_conf.num_ps_threads = 96; 891 sq_conf.num_vs_threads = 16; 892 sq_conf.num_gs_threads = 16; 893 sq_conf.num_es_threads = 16; 894 sq_conf.num_hs_threads = 16; 895 sq_conf.num_ls_threads = 16; 896 sq_conf.num_ps_stack_entries = 42; 897 sq_conf.num_vs_stack_entries = 42; 898 sq_conf.num_gs_stack_entries = 42; 899 sq_conf.num_es_stack_entries = 42; 900 sq_conf.num_hs_stack_entries = 42; 901 sq_conf.num_ls_stack_entries = 42; 902 break; 903 case CHIP_FAMILY_BARTS: 904 sq_conf.num_ps_gprs = 93; 905 sq_conf.num_vs_gprs = 46; 906 sq_conf.num_temp_gprs = 4; 907 sq_conf.num_gs_gprs = 31; 908 sq_conf.num_es_gprs = 31; 909 sq_conf.num_hs_gprs = 23; 910 sq_conf.num_ls_gprs = 23; 911 sq_conf.num_ps_threads = 128; 912 sq_conf.num_vs_threads = 20; 913 sq_conf.num_gs_threads = 20; 914 sq_conf.num_es_threads = 20; 915 sq_conf.num_hs_threads = 20; 916 sq_conf.num_ls_threads = 20; 917 sq_conf.num_ps_stack_entries = 85; 918 sq_conf.num_vs_stack_entries = 85; 919 sq_conf.num_gs_stack_entries = 85; 920 sq_conf.num_es_stack_entries = 85; 921 sq_conf.num_hs_stack_entries = 85; 922 sq_conf.num_ls_stack_entries = 85; 923 break; 924 case CHIP_FAMILY_TURKS: 925 sq_conf.num_ps_gprs = 93; 926 sq_conf.num_vs_gprs = 46; 927 sq_conf.num_temp_gprs = 4; 928 sq_conf.num_gs_gprs = 31; 929 sq_conf.num_es_gprs = 31; 930 sq_conf.num_hs_gprs = 23; 931 sq_conf.num_ls_gprs = 23; 932 sq_conf.num_ps_threads = 128; 933 sq_conf.num_vs_threads = 20; 934 sq_conf.num_gs_threads = 20; 935 sq_conf.num_es_threads = 20; 936 sq_conf.num_hs_threads = 20; 937 sq_conf.num_ls_threads = 20; 938 sq_conf.num_ps_stack_entries = 42; 939 sq_conf.num_vs_stack_entries = 42; 940 sq_conf.num_gs_stack_entries = 42; 941 sq_conf.num_es_stack_entries = 42; 942 sq_conf.num_hs_stack_entries = 42; 943 sq_conf.num_ls_stack_entries = 42; 944 break; 945 case CHIP_FAMILY_CAICOS: 946 sq_conf.num_ps_gprs = 93; 947 sq_conf.num_vs_gprs = 46; 948 sq_conf.num_temp_gprs = 4; 949 sq_conf.num_gs_gprs = 31; 950 sq_conf.num_es_gprs = 31; 951 sq_conf.num_hs_gprs = 23; 952 sq_conf.num_ls_gprs = 23; 953 sq_conf.num_ps_threads = 128; 954 sq_conf.num_vs_threads = 10; 955 sq_conf.num_gs_threads = 10; 956 sq_conf.num_es_threads = 10; 957 sq_conf.num_hs_threads = 10; 958 sq_conf.num_ls_threads = 10; 959 sq_conf.num_ps_stack_entries = 42; 960 sq_conf.num_vs_stack_entries = 42; 961 sq_conf.num_gs_stack_entries = 42; 962 sq_conf.num_es_stack_entries = 42; 963 sq_conf.num_hs_stack_entries = 42; 964 sq_conf.num_ls_stack_entries = 42; 965 break; 966 } 967 968 evergreen_sq_setup(pScrn, &sq_conf); 969 970 BEGIN_BATCH(24); 971 EREG(SQ_LDS_ALLOC_PS, 0); 972 EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0); 973 974 PACK0(SQ_ESGS_RING_ITEMSIZE, 6); 975 E32(0); 976 E32(0); 977 E32(0); 978 E32(0); 979 E32(0); 980 E32(0); 981 982 PACK0(SQ_GS_VERT_ITEMSIZE, 4); 983 E32(0); 984 E32(0); 985 E32(0); 986 E32(0); 987 988 PACK0(SQ_VTX_BASE_VTX_LOC, 2); 989 E32(0); 990 E32(0); 991 END_BATCH(); 992 993 /* DB */ 994 BEGIN_BATCH(3 + 2); 995 EREG(DB_Z_INFO, 0); 996 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 997 END_BATCH(); 998 999 BEGIN_BATCH(3 + 2); 1000 EREG(DB_STENCIL_INFO, 0); 1001 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1002 END_BATCH(); 1003 1004 BEGIN_BATCH(3 + 2); 1005 EREG(DB_HTILE_DATA_BASE, 0); 1006 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1007 END_BATCH(); 1008 1009 BEGIN_BATCH(49); 1010 EREG(DB_DEPTH_CONTROL, 0); 1011 1012 PACK0(PA_SC_VPORT_ZMIN_0, 2); 1013 EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0 1014 EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0 1015 1016 PACK0(DB_RENDER_CONTROL, 5); 1017 E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL 1018 E32(0); // DB_COUNT_CONTROL 1019 E32(0); // DB_DEPTH_VIEW 1020 E32(0x2a); // DB_RENDER_OVERRIDE 1021 E32(0); // DB_RENDER_OVERRIDE2 1022 1023 PACK0(DB_STENCIL_CLEAR, 2); 1024 E32(0); // DB_STENCIL_CLEAR 1025 E32(0); // DB_DEPTH_CLEAR 1026 1027 EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 1028 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 1029 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 1030 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 1031 1032 EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) | 1033 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 1034 1035 // SX 1036 EREG(SX_MISC, 0); 1037 1038 // CB 1039 PACK0(SX_ALPHA_TEST_CONTROL, 5); 1040 E32(0); // SX_ALPHA_TEST_CONTROL 1041 E32(0x00000000); //CB_BLEND_RED 1042 E32(0x00000000); //CB_BLEND_GREEN 1043 E32(0x00000000); //CB_BLEND_BLUE 1044 E32(0x00000000); //CB_BLEND_ALPHA 1045 1046 EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 1047 1048 // SC 1049 EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 1050 (0 << WINDOW_Y_OFFSET_shift))); 1051 EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 1052 EREG(PA_SC_EDGERULE, 0xAAAAAAAA); 1053 EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0); 1054 END_BATCH(); 1055 1056 /* clip boolean is set to always visible -> doesn't matter */ 1057 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1058 evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192); 1059 1060 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1061 evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192); 1062 1063 BEGIN_BATCH(57); 1064 PACK0(PA_SC_MODE_CNTL_0, 2); 1065 E32(0); // PA_SC_MODE_CNTL_0 1066 E32(0); // PA_SC_MODE_CNTL_1 1067 1068 PACK0(PA_SC_LINE_CNTL, 16); 1069 E32(0); // PA_SC_LINE_CNTL 1070 E32(0); // PA_SC_AA_CONFIG 1071 E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | 1072 PIX_CENTER_bit)); // PA_SU_VTX_CNTL 1073 EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 1074 EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 1075 EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1076 EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 1077 E32(0); // PA_SC_AA_SAMPLE_LOCS_0 1078 E32(0); 1079 E32(0); 1080 E32(0); 1081 E32(0); 1082 E32(0); 1083 E32(0); 1084 E32(0); // PA_SC_AA_SAMPLE_LOCS_7 1085 E32(0xFFFFFFFF); // PA_SC_AA_MASK 1086 1087 // CL 1088 PACK0(PA_CL_CLIP_CNTL, 8); 1089 E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1090 E32(FACE_bit); // PA_SU_SC_MODE_CNTL 1091 E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1092 E32(0); // PA_CL_VS_OUT_CNTL 1093 E32(0); // PA_CL_NANINF_CNTL 1094 E32(0); // PA_SU_LINE_STIPPLE_CNTL 1095 E32(0); // PA_SU_LINE_STIPPLE_SCALE 1096 E32(0); // PA_SU_PRIM_FILTER_CNTL 1097 1098 // SU 1099 PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1100 E32(0); 1101 E32(0); 1102 E32(0); 1103 E32(0); 1104 E32(0); 1105 E32(0); 1106 1107 /* src = semantic id 0; mask = semantic id 1 */ 1108 EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1109 (1 << SEMANTIC_1_shift))); 1110 PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1111 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1112 E32(((0 << SEMANTIC_shift) | 1113 (0x01 << DEFAULT_VAL_shift))); 1114 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1115 E32(((1 << SEMANTIC_shift) | 1116 (0x01 << DEFAULT_VAL_shift))); 1117 1118 PACK0(SPI_INPUT_Z, 8); 1119 E32(0); // SPI_INPUT_Z 1120 E32(0); // SPI_FOG_CNTL 1121 E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL 1122 E32(0); // SPI_PS_IN_CONTROL_2 1123 E32(0); 1124 E32(0); 1125 E32(0); 1126 E32(0); 1127 END_BATCH(); 1128 1129 // clear FS 1130 fs_conf.bo = accel_state->shaders_bo; 1131 evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1132 1133 // VGT 1134 BEGIN_BATCH(46); 1135 1136 PACK0(VGT_MAX_VTX_INDX, 4); 1137 E32(0xffffff); 1138 E32(0); 1139 E32(0); 1140 E32(0); 1141 1142 PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 1143 E32(0); 1144 E32(0); 1145 1146 PACK0(VGT_REUSE_OFF, 2); 1147 E32(0); 1148 E32(0); 1149 1150 PACK0(PA_SU_POINT_SIZE, 17); 1151 E32(0); // PA_SU_POINT_SIZE 1152 E32(0); // PA_SU_POINT_MINMAX 1153 E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1154 E32(0); // PA_SC_LINE_STIPPLE 1155 E32(0); // VGT_OUTPUT_PATH_CNTL 1156 E32(0); // VGT_HOS_CNTL 1157 E32(0); 1158 E32(0); 1159 E32(0); 1160 E32(0); 1161 E32(0); 1162 E32(0); 1163 E32(0); 1164 E32(0); 1165 E32(0); 1166 E32(0); 1167 E32(0); // VGT_GS_MODE 1168 1169 EREG(VGT_PRIMITIVEID_EN, 0); 1170 EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 1171 EREG(VGT_SHADER_STAGES_EN, 0); 1172 1173 PACK0(VGT_STRMOUT_CONFIG, 2); 1174 E32(0); 1175 E32(0); 1176 END_BATCH(); 1177} 1178 1179 1180/* 1181 * Commands 1182 */ 1183 1184void 1185evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1186{ 1187 RADEONInfoPtr info = RADEONPTR(pScrn); 1188 1189 BEGIN_BATCH(10); 1190 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1191 PACK3(IT_INDEX_TYPE, 1); 1192#if X_BYTE_ORDER == X_BIG_ENDIAN 1193 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1194#else 1195 E32(draw_conf->index_type); 1196#endif 1197 PACK3(IT_NUM_INSTANCES, 1); 1198 E32(draw_conf->num_instances); 1199 PACK3(IT_DRAW_INDEX_AUTO, 2); 1200 E32(draw_conf->num_indices); 1201 E32(draw_conf->vgt_draw_initiator); 1202 END_BATCH(); 1203} 1204 1205void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1206{ 1207 RADEONInfoPtr info = RADEONPTR(pScrn); 1208 struct radeon_accel_state *accel_state = info->accel_state; 1209 draw_config_t draw_conf; 1210 vtx_resource_t vtx_res; 1211 1212 if (accel_state->vbo.vb_start_op == -1) 1213 return; 1214 1215 CLEAR (draw_conf); 1216 CLEAR (vtx_res); 1217 1218 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1219 radeon_ib_discard(pScrn); 1220 radeon_cs_flush_indirect(pScrn); 1221 return; 1222 } 1223 1224 /* Vertex buffer setup */ 1225 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1226 vtx_res.id = SQ_FETCH_RESOURCE_vs; 1227 vtx_res.vtx_size_dw = vtx_size / 4; 1228 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1229 vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op; 1230 vtx_res.bo = accel_state->vbo.vb_bo; 1231 vtx_res.dst_sel_x = SQ_SEL_X; 1232 vtx_res.dst_sel_y = SQ_SEL_Y; 1233 vtx_res.dst_sel_z = SQ_SEL_Z; 1234 vtx_res.dst_sel_w = SQ_SEL_W; 1235#if X_BYTE_ORDER == X_BIG_ENDIAN 1236 vtx_res.endian = SQ_ENDIAN_8IN32; 1237#endif 1238 evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1239 1240 /* Draw */ 1241 draw_conf.prim_type = DI_PT_RECTLIST; 1242 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1243 draw_conf.num_instances = 1; 1244 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1245 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1246 1247 evergreen_draw_auto(pScrn, &draw_conf); 1248 1249 /* sync dst surface */ 1250 evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1251 accel_state->dst_size, accel_state->dst_obj.offset, 1252 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1253 1254 accel_state->vbo.vb_start_op = -1; 1255 accel_state->cbuf.vb_start_op = -1; 1256 accel_state->ib_reset_op = 0; 1257 1258} 1259 1260#endif 1261