evergreen_accel.c revision de2362d3
1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#include "xf86.h" 31 32#include <errno.h> 33 34#include "radeon.h" 35#include "evergreen_shader.h" 36#include "radeon_reg.h" 37#include "evergreen_reg.h" 38#include "evergreen_state.h" 39 40#include "radeon_vbo.h" 41#include "radeon_exa_shared.h" 42 43static const uint32_t EVERGREEN_ROP[16] = { 44 RADEON_ROP3_ZERO, /* GXclear */ 45 RADEON_ROP3_DSa, /* Gxand */ 46 RADEON_ROP3_SDna, /* GXandReverse */ 47 RADEON_ROP3_S, /* GXcopy */ 48 RADEON_ROP3_DSna, /* GXandInverted */ 49 RADEON_ROP3_D, /* GXnoop */ 50 RADEON_ROP3_DSx, /* GXxor */ 51 RADEON_ROP3_DSo, /* GXor */ 52 RADEON_ROP3_DSon, /* GXnor */ 53 RADEON_ROP3_DSxn, /* GXequiv */ 54 RADEON_ROP3_Dn, /* GXinvert */ 55 RADEON_ROP3_SDno, /* GXorReverse */ 56 RADEON_ROP3_Sn, /* GXcopyInverted */ 57 RADEON_ROP3_DSno, /* GXorInverted */ 58 RADEON_ROP3_DSan, /* GXnand */ 59 RADEON_ROP3_ONE, /* GXset */ 60}; 61 62void 63evergreen_start_3d(ScrnInfoPtr pScrn) 64{ 65 RADEONInfoPtr info = RADEONPTR(pScrn); 66 67 BEGIN_BATCH(3); 68 PACK3(IT_CONTEXT_CONTROL, 2); 69 E32(0x80000000); 70 E32(0x80000000); 71 END_BATCH(); 72 73} 74 75unsigned eg_tile_split(unsigned tile_split) 76{ 77 switch (tile_split) { 78 case 64: tile_split = 0; break; 79 case 128: tile_split = 1; break; 80 case 256: tile_split = 2; break; 81 case 512: tile_split = 3; break; 82 case 1024: tile_split = 4; break; 83 case 2048: tile_split = 5; break; 84 default: 85 case 4096: tile_split = 6; break; 86 } 87 return tile_split; 88} 89 90static unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect) 91{ 92 switch (macro_tile_aspect) { 93 default: 94 case 1: macro_tile_aspect = 0; break; 95 case 2: macro_tile_aspect = 1; break; 96 case 4: macro_tile_aspect = 2; break; 97 case 8: macro_tile_aspect = 3; break; 98 } 99 return macro_tile_aspect; 100} 101 102static unsigned eg_bank_wh(unsigned bankwh) 103{ 104 switch (bankwh) { 105 default: 106 case 1: bankwh = 0; break; 107 case 2: bankwh = 1; break; 108 case 4: bankwh = 2; break; 109 case 8: bankwh = 3; break; 110 } 111 return bankwh; 112} 113 114static unsigned eg_nbanks(unsigned nbanks) 115{ 116 switch (nbanks) { 117 default: 118 case 2: nbanks = 0; break; 119 case 4: nbanks = 1; break; 120 case 8: nbanks = 2; break; 121 case 16: nbanks = 3; break; 122 } 123 return nbanks; 124} 125 126/* 127 * Setup of functional groups 128 */ 129 130// asic stack/thread/gpr limits - need to query the drm 131static void 132evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 133{ 134 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; 135 uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; 136 uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; 137 RADEONInfoPtr info = RADEONPTR(pScrn); 138 139 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) || 140 (info->ChipFamily == CHIP_FAMILY_PALM) || 141 (info->ChipFamily == CHIP_FAMILY_SUMO) || 142 (info->ChipFamily == CHIP_FAMILY_SUMO2) || 143 (info->ChipFamily == CHIP_FAMILY_CAICOS)) 144 sq_config = 0; 145 else 146 sq_config = VC_ENABLE_bit; 147 148 sq_config |= (EXPORT_SRC_C_bit | 149 (sq_conf->cs_prio << CS_PRIO_shift) | 150 (sq_conf->ls_prio << LS_PRIO_shift) | 151 (sq_conf->hs_prio << HS_PRIO_shift) | 152 (sq_conf->ps_prio << PS_PRIO_shift) | 153 (sq_conf->vs_prio << VS_PRIO_shift) | 154 (sq_conf->gs_prio << GS_PRIO_shift) | 155 (sq_conf->es_prio << ES_PRIO_shift)); 156 157 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 158 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 159 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 160 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 161 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 162 sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) | 163 (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift)); 164 165 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 166 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 167 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 168 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 169 sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) | 170 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift)); 171 172 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 173 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 174 175 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 176 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 177 178 sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) | 179 (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift)); 180 181 BEGIN_BATCH(16); 182 /* disable dyn gprs */ 183 EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 184 PACK0(SQ_CONFIG, 4); 185 E32(sq_config); 186 E32(sq_gpr_resource_mgmt_1); 187 E32(sq_gpr_resource_mgmt_2); 188 E32(sq_gpr_resource_mgmt_3); 189 PACK0(SQ_THREAD_RESOURCE_MGMT, 5); 190 E32(sq_thread_resource_mgmt); 191 E32(sq_thread_resource_mgmt_2); 192 E32(sq_stack_resource_mgmt_1); 193 E32(sq_stack_resource_mgmt_2); 194 E32(sq_stack_resource_mgmt_3); 195 END_BATCH(); 196} 197 198/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that 199 * we use here. 200 */ 201void 202evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 203{ 204 uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim; 205 unsigned pitch, slice, w, h, array_mode, nbanks; 206 uint32_t tile_split, macro_aspect, bankw, bankh; 207 RADEONInfoPtr info = RADEONPTR(pScrn); 208 209 if (cb_conf->surface) { 210 switch (cb_conf->surface->level[0].mode) { 211 case RADEON_SURF_MODE_1D: 212 array_mode = 2; 213 break; 214 case RADEON_SURF_MODE_2D: 215 array_mode = 4; 216 break; 217 default: 218 array_mode = 0; 219 break; 220 } 221 w = cb_conf->surface->level[0].npix_x; 222 h = cb_conf->surface->level[0].npix_y; 223 pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1; 224 slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1; 225 tile_split = cb_conf->surface->tile_split; 226 macro_aspect = cb_conf->surface->mtilea; 227 bankw = cb_conf->surface->bankw; 228 bankh = cb_conf->surface->bankh; 229 tile_split = eg_tile_split(tile_split); 230 macro_aspect = eg_macro_tile_aspect(macro_aspect); 231 bankw = eg_bank_wh(bankw); 232 bankh = eg_bank_wh(bankh); 233 } else { 234 pitch = (cb_conf->w / 8) - 1; 235 h = RADEON_ALIGN(cb_conf->h, 8); 236 slice = ((cb_conf->w * h) / 64) - 1; 237 array_mode = cb_conf->array_mode; 238 w = cb_conf->w; 239 tile_split = 4; 240 macro_aspect = 0; 241 bankw = 0; 242 bankh = 0; 243 } 244 nbanks = info->num_banks; 245 nbanks = eg_nbanks(nbanks); 246 247 cb_color_attrib |= (tile_split << CB_COLOR0_ATTRIB__TILE_SPLIT_shift)| 248 (nbanks << CB_COLOR0_ATTRIB__NUM_BANKS_shift) | 249 (bankw << CB_COLOR0_ATTRIB__BANK_WIDTH_shift) | 250 (bankh << CB_COLOR0_ATTRIB__BANK_HEIGHT_shift) | 251 (macro_aspect << CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift); 252 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 253 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 254 (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 255 (cb_conf->number_type << NUMBER_TYPE_shift) | 256 (cb_conf->comp_swap << COMP_SWAP_shift) | 257 (cb_conf->source_format << SOURCE_FORMAT_shift) | 258 (cb_conf->resource_type << RESOURCE_TYPE_shift)); 259 if (cb_conf->blend_clamp) 260 cb_color_info |= BLEND_CLAMP_bit; 261 if (cb_conf->fast_clear) 262 cb_color_info |= FAST_CLEAR_bit; 263 if (cb_conf->compression) 264 cb_color_info |= COMPRESSION_bit; 265 if (cb_conf->blend_bypass) 266 cb_color_info |= BLEND_BYPASS_bit; 267 if (cb_conf->simple_float) 268 cb_color_info |= SIMPLE_FLOAT_bit; 269 if (cb_conf->round_mode) 270 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 271 if (cb_conf->tile_compact) 272 cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit; 273 if (cb_conf->rat) 274 cb_color_info |= RAT_bit; 275 276 /* bit 4 needs to be set for linear and depth/stencil surfaces */ 277 if (cb_conf->non_disp_tiling) 278 cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit; 279 280 switch (cb_conf->resource_type) { 281 case BUFFER: 282 /* number of elements in the surface */ 283 cb_color_dim = pitch * slice; 284 break; 285 default: 286 /* w/h of the surface */ 287 cb_color_dim = (((w - 1) << WIDTH_MAX_shift) | 288 ((cb_conf->h - 1) << HEIGHT_MAX_shift)); 289 break; 290 } 291 292 BEGIN_BATCH(3 + 2); 293 EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8)); 294 RELOC_BATCH(cb_conf->bo, 0, domain); 295 END_BATCH(); 296 297 /* Set CMASK & FMASK buffer to the offset of color buffer as 298 * we don't use those this shouldn't cause any issue and we 299 * then have a valid cmd stream 300 */ 301 BEGIN_BATCH(3 + 2); 302 EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8)); 303 RELOC_BATCH(cb_conf->bo, 0, domain); 304 END_BATCH(); 305 BEGIN_BATCH(3 + 2); 306 EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8)); 307 RELOC_BATCH(cb_conf->bo, 0, domain); 308 END_BATCH(); 309 310 /* tiling config */ 311 BEGIN_BATCH(3 + 2); 312 EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib); 313 RELOC_BATCH(cb_conf->bo, 0, domain); 314 END_BATCH(); 315 BEGIN_BATCH(3 + 2); 316 EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info); 317 RELOC_BATCH(cb_conf->bo, 0, domain); 318 END_BATCH(); 319 320 BEGIN_BATCH(33); 321 EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch); 322 EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice); 323 EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0); 324 EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim); 325 EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0); 326 EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0); 327 PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4); 328 E32(0); 329 E32(0); 330 E32(0); 331 E32(0); 332 EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 333 EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[cb_conf->rop] | 334 (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); 335 EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 336 END_BATCH(); 337} 338 339void evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color) 340{ 341 RADEONInfoPtr info = RADEONPTR(pScrn); 342 343 BEGIN_BATCH(2 + 4); 344 PACK0(CB_BLEND_RED, 4); 345 EFLOAT(color[0]); /* R */ 346 EFLOAT(color[1]); /* G */ 347 EFLOAT(color[2]); /* B */ 348 EFLOAT(color[3]); /* A */ 349 END_BATCH(); 350} 351 352static void 353evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 354 uint32_t size, uint64_t mc_addr, 355 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 356{ 357 RADEONInfoPtr info = RADEONPTR(pScrn); 358 uint32_t cp_coher_size; 359 if (size == 0xffffffff) 360 cp_coher_size = 0xffffffff; 361 else 362 cp_coher_size = ((size + 255) >> 8); 363 364 BEGIN_BATCH(5 + 2); 365 PACK3(IT_SURFACE_SYNC, 4); 366 E32(sync_type); 367 E32(cp_coher_size); 368 E32((mc_addr >> 8)); 369 E32(10); /* poll interval */ 370 RELOC_BATCH(bo, rdomains, wdomain); 371 END_BATCH(); 372} 373 374/* inserts a wait for vline in the command stream */ 375void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 376 xf86CrtcPtr crtc, int start, int stop) 377{ 378 RADEONInfoPtr info = RADEONPTR(pScrn); 379 drmmode_crtc_private_ptr drmmode_crtc; 380 381 if (!crtc) 382 return; 383 384 drmmode_crtc = crtc->driver_private; 385 386 if (!crtc->enabled) 387 return; 388 389 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 390 return; 391 392 start = max(start, crtc->y); 393 stop = min(stop, crtc->y + crtc->mode.VDisplay); 394 395 if (start >= stop) 396 return; 397 398 BEGIN_BATCH(11); 399 /* set the VLINE range */ 400 EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */ 401 (start << EVERGREEN_VLINE_START_SHIFT) | 402 (stop << EVERGREEN_VLINE_END_SHIFT)); 403 404 /* tell the CP to poll the VLINE state register */ 405 PACK3(IT_WAIT_REG_MEM, 6); 406 E32(IT_WAIT_REG | IT_WAIT_EQ); 407 E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS)); 408 E32(0); 409 E32(0); // Ref value 410 E32(EVERGREEN_VLINE_STAT); // Mask 411 E32(10); // Wait interval 412 /* add crtc reloc */ 413 PACK3(IT_NOP, 1); 414 E32(drmmode_crtc->mode_crtc->crtc_id); 415 END_BATCH(); 416} 417 418void 419evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 420{ 421 RADEONInfoPtr info = RADEONPTR(pScrn); 422 423 BEGIN_BATCH(8); 424 /* Interpolator setup */ 425 EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 426 PACK0(SPI_PS_IN_CONTROL_0, 3); 427 E32(((num_interp << NUM_INTERP_shift) | 428 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 429 E32(0); // SPI_PS_IN_CONTROL_1 430 E32(0); // SPI_INTERP_CONTROL_0 431 END_BATCH(); 432} 433 434void 435evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 436{ 437 RADEONInfoPtr info = RADEONPTR(pScrn); 438 uint32_t sq_pgm_resources; 439 440 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 441 (fs_conf->stack_size << STACK_SIZE_shift)); 442 443 if (fs_conf->dx10_clamp) 444 sq_pgm_resources |= DX10_CLAMP_bit; 445 446 BEGIN_BATCH(3 + 2); 447 EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 448 RELOC_BATCH(fs_conf->bo, domain, 0); 449 END_BATCH(); 450 451 BEGIN_BATCH(3); 452 EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 453 END_BATCH(); 454} 455 456/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS, 457 * but none that we use here. 458 */ 459void 460evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 461{ 462 RADEONInfoPtr info = RADEONPTR(pScrn); 463 uint32_t sq_pgm_resources, sq_pgm_resources_2; 464 465 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 466 (vs_conf->stack_size << STACK_SIZE_shift)); 467 468 if (vs_conf->dx10_clamp) 469 sq_pgm_resources |= DX10_CLAMP_bit; 470 if (vs_conf->uncached_first_inst) 471 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 472 473 sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) | 474 (vs_conf->double_round << DOUBLE_ROUND_shift)); 475 476 if (vs_conf->allow_sdi) 477 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; 478 if (vs_conf->allow_sd0) 479 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; 480 if (vs_conf->allow_ddi) 481 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; 482 if (vs_conf->allow_ddo) 483 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; 484 485 /* flush SQ cache */ 486 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 487 vs_conf->shader_size, vs_conf->shader_addr, 488 vs_conf->bo, domain, 0); 489 490 BEGIN_BATCH(3 + 2); 491 EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 492 RELOC_BATCH(vs_conf->bo, domain, 0); 493 END_BATCH(); 494 495 BEGIN_BATCH(4); 496 PACK0(SQ_PGM_RESOURCES_VS, 2); 497 E32(sq_pgm_resources); 498 E32(sq_pgm_resources_2); 499 END_BATCH(); 500} 501 502/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS, 503 * but none that we use here. 504 */ 505void 506evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 507{ 508 RADEONInfoPtr info = RADEONPTR(pScrn); 509 uint32_t sq_pgm_resources, sq_pgm_resources_2; 510 511 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 512 (ps_conf->stack_size << STACK_SIZE_shift)); 513 514 if (ps_conf->dx10_clamp) 515 sq_pgm_resources |= DX10_CLAMP_bit; 516 if (ps_conf->uncached_first_inst) 517 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 518 if (ps_conf->clamp_consts) 519 sq_pgm_resources |= CLAMP_CONSTS_bit; 520 521 sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) | 522 (ps_conf->double_round << DOUBLE_ROUND_shift)); 523 524 if (ps_conf->allow_sdi) 525 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; 526 if (ps_conf->allow_sd0) 527 sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; 528 if (ps_conf->allow_ddi) 529 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; 530 if (ps_conf->allow_ddo) 531 sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; 532 533 /* flush SQ cache */ 534 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 535 ps_conf->shader_size, ps_conf->shader_addr, 536 ps_conf->bo, domain, 0); 537 538 BEGIN_BATCH(3 + 2); 539 EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 540 RELOC_BATCH(ps_conf->bo, domain, 0); 541 END_BATCH(); 542 543 BEGIN_BATCH(5); 544 PACK0(SQ_PGM_RESOURCES_PS, 3); 545 E32(sq_pgm_resources); 546 E32(sq_pgm_resources_2); 547 E32(ps_conf->export_mode); 548 END_BATCH(); 549} 550 551void 552evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain) 553{ 554 RADEONInfoPtr info = RADEONPTR(pScrn); 555 /* size reg is units of 16 consts (4 dwords each) */ 556 uint32_t size = const_conf->size_bytes >> 8; 557 558 if (size == 0) 559 size = 1; 560 561#if X_BYTE_ORDER == X_BIG_ENDIAN 562 { 563 uint32_t count = size << 6, *p = const_conf->cpu_ptr; 564 565 while(count--) { 566 *p = cpu_to_le32(*p); 567 p++; 568 } 569 } 570#endif 571 572 /* flush SQ cache */ 573 evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 574 const_conf->size_bytes, const_conf->const_addr, 575 const_conf->bo, domain, 0); 576 577 switch (const_conf->type) { 578 case SHADER_TYPE_VS: 579 BEGIN_BATCH(3); 580 EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size); 581 END_BATCH(); 582 BEGIN_BATCH(3 + 2); 583 EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8); 584 RELOC_BATCH(const_conf->bo, domain, 0); 585 END_BATCH(); 586 break; 587 case SHADER_TYPE_PS: 588 BEGIN_BATCH(3); 589 EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size); 590 END_BATCH(); 591 BEGIN_BATCH(3 + 2); 592 EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8); 593 RELOC_BATCH(const_conf->bo, domain, 0); 594 END_BATCH(); 595 break; 596 default: 597 ErrorF("Unsupported const type %d\n", const_conf->type); 598 break; 599 } 600 601} 602 603void 604evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 605{ 606 RADEONInfoPtr info = RADEONPTR(pScrn); 607 /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each 608 * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs. 609 */ 610 BEGIN_BATCH(3); 611 EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 612 END_BATCH(); 613} 614 615/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0, 616 * but none that we use here. 617 */ 618static void 619evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 620{ 621 RADEONInfoPtr info = RADEONPTR(pScrn); 622 struct radeon_accel_state *accel_state = info->accel_state; 623 uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4; 624 625 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 626 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 627 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 628 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 629 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 630 if (res->clamp_x) 631 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 632 633 if (res->format_comp_all) 634 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 635 636 if (res->srf_mode_all) 637 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 638 639 sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) | 640 (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) | 641 (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) | 642 (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift)); 643 644 if (res->uncached) 645 sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit; 646 647 /* XXX ??? */ 648 sq_vtx_constant_word4 = 0; 649 650 /* flush vertex cache */ 651 if ((info->ChipFamily == CHIP_FAMILY_CEDAR) || 652 (info->ChipFamily == CHIP_FAMILY_PALM) || 653 (info->ChipFamily == CHIP_FAMILY_SUMO) || 654 (info->ChipFamily == CHIP_FAMILY_SUMO2) || 655 (info->ChipFamily == CHIP_FAMILY_CAICOS) || 656 (info->ChipFamily == CHIP_FAMILY_CAYMAN) || 657 (info->ChipFamily == CHIP_FAMILY_ARUBA)) 658 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 659 accel_state->vbo.vb_offset, 0, 660 res->bo, 661 domain, 0); 662 else 663 evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 664 accel_state->vbo.vb_offset, 0, 665 res->bo, 666 domain, 0); 667 668 BEGIN_BATCH(10 + 2); 669 PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8); 670 E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 671 E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 672 E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 673 E32(sq_vtx_constant_word3); // 3: swizzles 674 E32(sq_vtx_constant_word4); // 4: num elements 675 E32(0); // 5: n/a 676 E32(0); // 6: n/a 677 E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE 678 RELOC_BATCH(res->bo, domain, 0); 679 END_BATCH(); 680} 681 682/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0, 683 * but none that we use here. 684 */ 685void 686evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 687{ 688 RADEONInfoPtr info = RADEONPTR(pScrn); 689 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 690 uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7; 691 uint32_t array_mode, pitch, tile_split, macro_aspect, bankw, bankh, nbanks; 692 693 if (tex_res->surface) { 694 switch (tex_res->surface->level[0].mode) { 695 case RADEON_SURF_MODE_1D: 696 array_mode = 2; 697 break; 698 case RADEON_SURF_MODE_2D: 699 array_mode = 4; 700 break; 701 default: 702 array_mode = 0; 703 break; 704 } 705 pitch = tex_res->surface->level[0].nblk_x >> 3; 706 tile_split = tex_res->surface->tile_split; 707 macro_aspect = tex_res->surface->mtilea; 708 bankw = tex_res->surface->bankw; 709 bankh = tex_res->surface->bankh; 710 tile_split = eg_tile_split(tile_split); 711 macro_aspect = eg_macro_tile_aspect(macro_aspect); 712 bankw = eg_bank_wh(bankw); 713 bankh = eg_bank_wh(bankh); 714 } else { 715 array_mode = tex_res->array_mode; 716 pitch = (tex_res->pitch + 7) >> 3; 717 tile_split = 4; 718 macro_aspect = 0; 719 bankw = 0; 720 bankh = 0; 721 } 722 nbanks = info->num_banks; 723 nbanks = eg_nbanks(nbanks); 724 725 sq_tex_resource_word0 = (tex_res->dim << DIM_shift); 726 727 if (tex_res->w) 728 sq_tex_resource_word0 |= ( ((pitch - 1) << PITCH_shift) | 729 ((tex_res->w - 1) << TEX_WIDTH_shift) ); 730 731 if (tex_res->tile_type) 732 sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit; 733 734 sq_tex_resource_word1 = (array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift); 735 736 if (tex_res->h) 737 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 738 if (tex_res->depth) 739 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 740 741 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 742 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 743 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 744 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 745 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 746 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 747 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 748 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 749 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 750 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 751 (tex_res->base_level << BASE_LEVEL_shift)); 752 753 if (tex_res->srf_mode_all) 754 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 755 if (tex_res->force_degamma) 756 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 757 758 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 759 (tex_res->base_array << BASE_ARRAY_shift) | 760 (tex_res->last_array << LAST_ARRAY_shift)); 761 762 sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) | 763 (tex_res->perf_modulation << PERF_MODULATION_shift) | 764 (tile_split << SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift)); 765 766 if (tex_res->interlaced) 767 sq_tex_resource_word6 |= INTERLACED_bit; 768 769 sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) | 770 (macro_aspect << SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift) | 771 (nbanks << SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift) | 772 (bankw << SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift) | 773 (bankh << SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift) | 774 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift)); 775 776 /* flush texture cache */ 777 evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 778 tex_res->size, tex_res->base, 779 tex_res->bo, domain, 0); 780 781 BEGIN_BATCH(10 + 4); 782 PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8); 783 E32(sq_tex_resource_word0); 784 E32(sq_tex_resource_word1); 785 E32(((tex_res->base) >> 8)); 786 E32(((tex_res->mip_base) >> 8)); 787 E32(sq_tex_resource_word4); 788 E32(sq_tex_resource_word5); 789 E32(sq_tex_resource_word6); 790 E32(sq_tex_resource_word7); 791 RELOC_BATCH(tex_res->bo, domain, 0); 792 RELOC_BATCH(tex_res->mip_bo, domain, 0); 793 END_BATCH(); 794} 795 796/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0, 797 * but none that we use here. 798 */ 799void 800evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 801{ 802 RADEONInfoPtr info = RADEONPTR(pScrn); 803 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 804 805 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 806 (s->clamp_y << CLAMP_Y_shift) | 807 (s->clamp_z << CLAMP_Z_shift) | 808 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 809 (s->xy_min_filter << XY_MIN_FILTER_shift) | 810 (s->z_filter << Z_FILTER_shift) | 811 (s->mip_filter << MIP_FILTER_shift) | 812 (s->border_color << BORDER_COLOR_TYPE_shift) | 813 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 814 (s->chroma_key << CHROMA_KEY_shift)); 815 816 sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) | 817 (s->max_lod << MAX_LOD_shift) | 818 (s->perf_mip << PERF_MIP_shift) | 819 (s->perf_z << PERF_Z_shift)); 820 821 822 sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) | 823 (s->lod_bias2 << LOD_BIAS_SEC_shift)); 824 825 if (s->mc_coord_truncate) 826 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 827 if (s->force_degamma) 828 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 829 if (s->truncate_coord) 830 sq_tex_sampler_word2 |= TRUNCATE_COORD_bit; 831 if (s->disable_cube_wrap) 832 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit; 833 if (s->type) 834 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 835 836 BEGIN_BATCH(5); 837 PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 838 E32(sq_tex_sampler_word0); 839 E32(sq_tex_sampler_word1); 840 E32(sq_tex_sampler_word2); 841 END_BATCH(); 842} 843 844/* workarounds for hw bugs in eg+ */ 845/* only affects screen/window/generic/vport. cliprects are not affected */ 846static void 847evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2) 848{ 849 RADEONInfoPtr info = RADEONPTR(pScrn); 850 851 /* all eg+ asics */ 852 if (*x2 == 0) 853 *x1 = 1; 854 if (*y2 == 0) 855 *y1 = 1; 856 857 /* cayman/tn only */ 858 if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) { 859 /* cliprects aren't affected so we can use them to clip if we need 860 * a true 1x1 clip region 861 */ 862 if ((*x2 == 1) && (*y2 == 1)) 863 *x2 = 2; 864 } 865} 866 867//XXX deal with clip offsets in clip setup 868void 869evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 870{ 871 RADEONInfoPtr info = RADEONPTR(pScrn); 872 873 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 874 875 BEGIN_BATCH(4); 876 PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 877 E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 878 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 879 E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 880 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 881 END_BATCH(); 882} 883 884void 885evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 886{ 887 RADEONInfoPtr info = RADEONPTR(pScrn); 888 889 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 890 891 BEGIN_BATCH(4); 892 PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 893 E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 894 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 895 WINDOW_OFFSET_DISABLE_bit)); 896 E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 897 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 898 END_BATCH(); 899} 900 901void 902evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 903{ 904 RADEONInfoPtr info = RADEONPTR(pScrn); 905 906 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 907 908 BEGIN_BATCH(4); 909 PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 910 E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 911 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 912 WINDOW_OFFSET_DISABLE_bit)); 913 E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 914 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 915 END_BATCH(); 916} 917 918void 919evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 920{ 921 RADEONInfoPtr info = RADEONPTR(pScrn); 922 923 evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2); 924 925 BEGIN_BATCH(4); 926 PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 927 E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 928 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 929 WINDOW_OFFSET_DISABLE_bit)); 930 E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 931 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 932 END_BATCH(); 933} 934 935void 936evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 937{ 938 RADEONInfoPtr info = RADEONPTR(pScrn); 939 940 BEGIN_BATCH(4); 941 PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 942 E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 943 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 944 E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 945 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 946 END_BATCH(); 947} 948 949/* 950 * Setup of default state 951 */ 952 953void 954evergreen_set_default_state(ScrnInfoPtr pScrn) 955{ 956 tex_resource_t tex_res; 957 shader_config_t fs_conf; 958 sq_config_t sq_conf; 959 int i; 960 RADEONInfoPtr info = RADEONPTR(pScrn); 961 struct radeon_accel_state *accel_state = info->accel_state; 962 963 if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) { 964 cayman_set_default_state(pScrn); 965 return; 966 } 967 968 if (accel_state->XInited3D) 969 return; 970 971 memset(&tex_res, 0, sizeof(tex_resource_t)); 972 memset(&fs_conf, 0, sizeof(shader_config_t)); 973 974 accel_state->XInited3D = TRUE; 975 976 evergreen_start_3d(pScrn); 977 978 /* SQ */ 979 sq_conf.ps_prio = 0; 980 sq_conf.vs_prio = 1; 981 sq_conf.gs_prio = 2; 982 sq_conf.es_prio = 3; 983 sq_conf.hs_prio = 0; 984 sq_conf.ls_prio = 0; 985 sq_conf.cs_prio = 0; 986 987 switch (info->ChipFamily) { 988 case CHIP_FAMILY_CEDAR: 989 default: 990 sq_conf.num_ps_gprs = 93; 991 sq_conf.num_vs_gprs = 46; 992 sq_conf.num_temp_gprs = 4; 993 sq_conf.num_gs_gprs = 31; 994 sq_conf.num_es_gprs = 31; 995 sq_conf.num_hs_gprs = 23; 996 sq_conf.num_ls_gprs = 23; 997 sq_conf.num_ps_threads = 96; 998 sq_conf.num_vs_threads = 16; 999 sq_conf.num_gs_threads = 16; 1000 sq_conf.num_es_threads = 16; 1001 sq_conf.num_hs_threads = 16; 1002 sq_conf.num_ls_threads = 16; 1003 sq_conf.num_ps_stack_entries = 42; 1004 sq_conf.num_vs_stack_entries = 42; 1005 sq_conf.num_gs_stack_entries = 42; 1006 sq_conf.num_es_stack_entries = 42; 1007 sq_conf.num_hs_stack_entries = 42; 1008 sq_conf.num_ls_stack_entries = 42; 1009 break; 1010 case CHIP_FAMILY_REDWOOD: 1011 sq_conf.num_ps_gprs = 93; 1012 sq_conf.num_vs_gprs = 46; 1013 sq_conf.num_temp_gprs = 4; 1014 sq_conf.num_gs_gprs = 31; 1015 sq_conf.num_es_gprs = 31; 1016 sq_conf.num_hs_gprs = 23; 1017 sq_conf.num_ls_gprs = 23; 1018 sq_conf.num_ps_threads = 128; 1019 sq_conf.num_vs_threads = 20; 1020 sq_conf.num_gs_threads = 20; 1021 sq_conf.num_es_threads = 20; 1022 sq_conf.num_hs_threads = 20; 1023 sq_conf.num_ls_threads = 20; 1024 sq_conf.num_ps_stack_entries = 42; 1025 sq_conf.num_vs_stack_entries = 42; 1026 sq_conf.num_gs_stack_entries = 42; 1027 sq_conf.num_es_stack_entries = 42; 1028 sq_conf.num_hs_stack_entries = 42; 1029 sq_conf.num_ls_stack_entries = 42; 1030 break; 1031 case CHIP_FAMILY_JUNIPER: 1032 sq_conf.num_ps_gprs = 93; 1033 sq_conf.num_vs_gprs = 46; 1034 sq_conf.num_temp_gprs = 4; 1035 sq_conf.num_gs_gprs = 31; 1036 sq_conf.num_es_gprs = 31; 1037 sq_conf.num_hs_gprs = 23; 1038 sq_conf.num_ls_gprs = 23; 1039 sq_conf.num_ps_threads = 128; 1040 sq_conf.num_vs_threads = 20; 1041 sq_conf.num_gs_threads = 20; 1042 sq_conf.num_es_threads = 20; 1043 sq_conf.num_hs_threads = 20; 1044 sq_conf.num_ls_threads = 20; 1045 sq_conf.num_ps_stack_entries = 85; 1046 sq_conf.num_vs_stack_entries = 85; 1047 sq_conf.num_gs_stack_entries = 85; 1048 sq_conf.num_es_stack_entries = 85; 1049 sq_conf.num_hs_stack_entries = 85; 1050 sq_conf.num_ls_stack_entries = 85; 1051 break; 1052 case CHIP_FAMILY_CYPRESS: 1053 case CHIP_FAMILY_HEMLOCK: 1054 sq_conf.num_ps_gprs = 93; 1055 sq_conf.num_vs_gprs = 46; 1056 sq_conf.num_temp_gprs = 4; 1057 sq_conf.num_gs_gprs = 31; 1058 sq_conf.num_es_gprs = 31; 1059 sq_conf.num_hs_gprs = 23; 1060 sq_conf.num_ls_gprs = 23; 1061 sq_conf.num_ps_threads = 128; 1062 sq_conf.num_vs_threads = 20; 1063 sq_conf.num_gs_threads = 20; 1064 sq_conf.num_es_threads = 20; 1065 sq_conf.num_hs_threads = 20; 1066 sq_conf.num_ls_threads = 20; 1067 sq_conf.num_ps_stack_entries = 85; 1068 sq_conf.num_vs_stack_entries = 85; 1069 sq_conf.num_gs_stack_entries = 85; 1070 sq_conf.num_es_stack_entries = 85; 1071 sq_conf.num_hs_stack_entries = 85; 1072 sq_conf.num_ls_stack_entries = 85; 1073 break; 1074 case CHIP_FAMILY_PALM: 1075 sq_conf.num_ps_gprs = 93; 1076 sq_conf.num_vs_gprs = 46; 1077 sq_conf.num_temp_gprs = 4; 1078 sq_conf.num_gs_gprs = 31; 1079 sq_conf.num_es_gprs = 31; 1080 sq_conf.num_hs_gprs = 23; 1081 sq_conf.num_ls_gprs = 23; 1082 sq_conf.num_ps_threads = 96; 1083 sq_conf.num_vs_threads = 16; 1084 sq_conf.num_gs_threads = 16; 1085 sq_conf.num_es_threads = 16; 1086 sq_conf.num_hs_threads = 16; 1087 sq_conf.num_ls_threads = 16; 1088 sq_conf.num_ps_stack_entries = 42; 1089 sq_conf.num_vs_stack_entries = 42; 1090 sq_conf.num_gs_stack_entries = 42; 1091 sq_conf.num_es_stack_entries = 42; 1092 sq_conf.num_hs_stack_entries = 42; 1093 sq_conf.num_ls_stack_entries = 42; 1094 break; 1095 case CHIP_FAMILY_SUMO: 1096 sq_conf.num_ps_gprs = 93; 1097 sq_conf.num_vs_gprs = 46; 1098 sq_conf.num_temp_gprs = 4; 1099 sq_conf.num_gs_gprs = 31; 1100 sq_conf.num_es_gprs = 31; 1101 sq_conf.num_hs_gprs = 23; 1102 sq_conf.num_ls_gprs = 23; 1103 sq_conf.num_ps_threads = 96; 1104 sq_conf.num_vs_threads = 25; 1105 sq_conf.num_gs_threads = 25; 1106 sq_conf.num_es_threads = 25; 1107 sq_conf.num_hs_threads = 25; 1108 sq_conf.num_ls_threads = 25; 1109 sq_conf.num_ps_stack_entries = 42; 1110 sq_conf.num_vs_stack_entries = 42; 1111 sq_conf.num_gs_stack_entries = 42; 1112 sq_conf.num_es_stack_entries = 42; 1113 sq_conf.num_hs_stack_entries = 42; 1114 sq_conf.num_ls_stack_entries = 42; 1115 break; 1116 case CHIP_FAMILY_SUMO2: 1117 sq_conf.num_ps_gprs = 93; 1118 sq_conf.num_vs_gprs = 46; 1119 sq_conf.num_temp_gprs = 4; 1120 sq_conf.num_gs_gprs = 31; 1121 sq_conf.num_es_gprs = 31; 1122 sq_conf.num_hs_gprs = 23; 1123 sq_conf.num_ls_gprs = 23; 1124 sq_conf.num_ps_threads = 96; 1125 sq_conf.num_vs_threads = 25; 1126 sq_conf.num_gs_threads = 25; 1127 sq_conf.num_es_threads = 25; 1128 sq_conf.num_hs_threads = 25; 1129 sq_conf.num_ls_threads = 25; 1130 sq_conf.num_ps_stack_entries = 85; 1131 sq_conf.num_vs_stack_entries = 85; 1132 sq_conf.num_gs_stack_entries = 85; 1133 sq_conf.num_es_stack_entries = 85; 1134 sq_conf.num_hs_stack_entries = 85; 1135 sq_conf.num_ls_stack_entries = 85; 1136 break; 1137 case CHIP_FAMILY_BARTS: 1138 sq_conf.num_ps_gprs = 93; 1139 sq_conf.num_vs_gprs = 46; 1140 sq_conf.num_temp_gprs = 4; 1141 sq_conf.num_gs_gprs = 31; 1142 sq_conf.num_es_gprs = 31; 1143 sq_conf.num_hs_gprs = 23; 1144 sq_conf.num_ls_gprs = 23; 1145 sq_conf.num_ps_threads = 128; 1146 sq_conf.num_vs_threads = 20; 1147 sq_conf.num_gs_threads = 20; 1148 sq_conf.num_es_threads = 20; 1149 sq_conf.num_hs_threads = 20; 1150 sq_conf.num_ls_threads = 20; 1151 sq_conf.num_ps_stack_entries = 85; 1152 sq_conf.num_vs_stack_entries = 85; 1153 sq_conf.num_gs_stack_entries = 85; 1154 sq_conf.num_es_stack_entries = 85; 1155 sq_conf.num_hs_stack_entries = 85; 1156 sq_conf.num_ls_stack_entries = 85; 1157 break; 1158 case CHIP_FAMILY_TURKS: 1159 sq_conf.num_ps_gprs = 93; 1160 sq_conf.num_vs_gprs = 46; 1161 sq_conf.num_temp_gprs = 4; 1162 sq_conf.num_gs_gprs = 31; 1163 sq_conf.num_es_gprs = 31; 1164 sq_conf.num_hs_gprs = 23; 1165 sq_conf.num_ls_gprs = 23; 1166 sq_conf.num_ps_threads = 128; 1167 sq_conf.num_vs_threads = 20; 1168 sq_conf.num_gs_threads = 20; 1169 sq_conf.num_es_threads = 20; 1170 sq_conf.num_hs_threads = 20; 1171 sq_conf.num_ls_threads = 20; 1172 sq_conf.num_ps_stack_entries = 42; 1173 sq_conf.num_vs_stack_entries = 42; 1174 sq_conf.num_gs_stack_entries = 42; 1175 sq_conf.num_es_stack_entries = 42; 1176 sq_conf.num_hs_stack_entries = 42; 1177 sq_conf.num_ls_stack_entries = 42; 1178 break; 1179 case CHIP_FAMILY_CAICOS: 1180 sq_conf.num_ps_gprs = 93; 1181 sq_conf.num_vs_gprs = 46; 1182 sq_conf.num_temp_gprs = 4; 1183 sq_conf.num_gs_gprs = 31; 1184 sq_conf.num_es_gprs = 31; 1185 sq_conf.num_hs_gprs = 23; 1186 sq_conf.num_ls_gprs = 23; 1187 sq_conf.num_ps_threads = 128; 1188 sq_conf.num_vs_threads = 10; 1189 sq_conf.num_gs_threads = 10; 1190 sq_conf.num_es_threads = 10; 1191 sq_conf.num_hs_threads = 10; 1192 sq_conf.num_ls_threads = 10; 1193 sq_conf.num_ps_stack_entries = 42; 1194 sq_conf.num_vs_stack_entries = 42; 1195 sq_conf.num_gs_stack_entries = 42; 1196 sq_conf.num_es_stack_entries = 42; 1197 sq_conf.num_hs_stack_entries = 42; 1198 sq_conf.num_ls_stack_entries = 42; 1199 break; 1200 } 1201 1202 evergreen_sq_setup(pScrn, &sq_conf); 1203 1204 BEGIN_BATCH(27); 1205 EREG(SQ_LDS_ALLOC_PS, 0); 1206 EREG(SQ_LDS_RESOURCE_MGMT, 0x10001000); 1207 EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0); 1208 1209 PACK0(SQ_ESGS_RING_ITEMSIZE, 6); 1210 E32(0); 1211 E32(0); 1212 E32(0); 1213 E32(0); 1214 E32(0); 1215 E32(0); 1216 1217 PACK0(SQ_GS_VERT_ITEMSIZE, 4); 1218 E32(0); 1219 E32(0); 1220 E32(0); 1221 E32(0); 1222 1223 PACK0(SQ_VTX_BASE_VTX_LOC, 2); 1224 E32(0); 1225 E32(0); 1226 END_BATCH(); 1227 1228 /* DB */ 1229 BEGIN_BATCH(3 + 2); 1230 EREG(DB_Z_INFO, 0); 1231 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1232 END_BATCH(); 1233 1234 BEGIN_BATCH(3 + 2); 1235 EREG(DB_STENCIL_INFO, 0); 1236 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1237 END_BATCH(); 1238 1239 BEGIN_BATCH(3 + 2); 1240 EREG(DB_HTILE_DATA_BASE, 0); 1241 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1242 END_BATCH(); 1243 1244 BEGIN_BATCH(49); 1245 EREG(DB_DEPTH_CONTROL, 0); 1246 1247 PACK0(PA_SC_VPORT_ZMIN_0, 2); 1248 EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0 1249 EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0 1250 1251 PACK0(DB_RENDER_CONTROL, 5); 1252 E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL 1253 E32(0); // DB_COUNT_CONTROL 1254 E32(0); // DB_DEPTH_VIEW 1255 E32(0x2a); // DB_RENDER_OVERRIDE 1256 E32(0); // DB_RENDER_OVERRIDE2 1257 1258 PACK0(DB_STENCIL_CLEAR, 2); 1259 E32(0); // DB_STENCIL_CLEAR 1260 E32(0); // DB_DEPTH_CLEAR 1261 1262 EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 1263 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 1264 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 1265 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 1266 1267 EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) | 1268 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 1269 1270 // SX 1271 EREG(SX_MISC, 0); 1272 1273 // CB 1274 PACK0(SX_ALPHA_TEST_CONTROL, 5); 1275 E32(0); // SX_ALPHA_TEST_CONTROL 1276 E32(0x00000000); //CB_BLEND_RED 1277 E32(0x00000000); //CB_BLEND_GREEN 1278 E32(0x00000000); //CB_BLEND_BLUE 1279 E32(0x00000000); //CB_BLEND_ALPHA 1280 1281 EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 1282 1283 // SC 1284 EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 1285 (0 << WINDOW_Y_OFFSET_shift))); 1286 EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 1287 EREG(PA_SC_EDGERULE, 0xAAAAAAAA); 1288 EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0); 1289 END_BATCH(); 1290 1291 /* clip boolean is set to always visible -> doesn't matter */ 1292 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1293 evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192); 1294 1295 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1296 evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192); 1297 1298 BEGIN_BATCH(57); 1299 PACK0(PA_SC_MODE_CNTL_0, 2); 1300 E32(0); // PA_SC_MODE_CNTL_0 1301 E32(0); // PA_SC_MODE_CNTL_1 1302 1303 PACK0(PA_SC_LINE_CNTL, 16); 1304 E32(0); // PA_SC_LINE_CNTL 1305 E32(0); // PA_SC_AA_CONFIG 1306 E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | 1307 PIX_CENTER_bit)); // PA_SU_VTX_CNTL 1308 EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 1309 EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 1310 EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1311 EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 1312 E32(0); // PA_SC_AA_SAMPLE_LOCS_0 1313 E32(0); 1314 E32(0); 1315 E32(0); 1316 E32(0); 1317 E32(0); 1318 E32(0); 1319 E32(0); // PA_SC_AA_SAMPLE_LOCS_7 1320 E32(0xFFFFFFFF); // PA_SC_AA_MASK 1321 1322 // CL 1323 PACK0(PA_CL_CLIP_CNTL, 8); 1324 E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1325 E32(FACE_bit); // PA_SU_SC_MODE_CNTL 1326 E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1327 E32(0); // PA_CL_VS_OUT_CNTL 1328 E32(0); // PA_CL_NANINF_CNTL 1329 E32(0); // PA_SU_LINE_STIPPLE_CNTL 1330 E32(0); // PA_SU_LINE_STIPPLE_SCALE 1331 E32(0); // PA_SU_PRIM_FILTER_CNTL 1332 1333 // SU 1334 PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1335 E32(0); 1336 E32(0); 1337 E32(0); 1338 E32(0); 1339 E32(0); 1340 E32(0); 1341 1342 /* src = semantic id 0; mask = semantic id 1 */ 1343 EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1344 (1 << SEMANTIC_1_shift))); 1345 PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1346 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1347 E32(((0 << SEMANTIC_shift) | 1348 (0x01 << DEFAULT_VAL_shift))); 1349 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1350 E32(((1 << SEMANTIC_shift) | 1351 (0x01 << DEFAULT_VAL_shift))); 1352 1353 PACK0(SPI_INPUT_Z, 8); 1354 E32(0); // SPI_INPUT_Z 1355 E32(0); // SPI_FOG_CNTL 1356 E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL 1357 E32(0); // SPI_PS_IN_CONTROL_2 1358 E32(0); 1359 E32(0); 1360 E32(0); 1361 E32(0); 1362 END_BATCH(); 1363 1364 // clear FS 1365 fs_conf.bo = accel_state->shaders_bo; 1366 evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1367 1368 // VGT 1369 BEGIN_BATCH(46); 1370 1371 PACK0(VGT_MAX_VTX_INDX, 4); 1372 E32(0xffffff); 1373 E32(0); 1374 E32(0); 1375 E32(0); 1376 1377 PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 1378 E32(0); 1379 E32(0); 1380 1381 PACK0(VGT_REUSE_OFF, 2); 1382 E32(0); 1383 E32(0); 1384 1385 PACK0(PA_SU_POINT_SIZE, 17); 1386 E32(0); // PA_SU_POINT_SIZE 1387 E32(0); // PA_SU_POINT_MINMAX 1388 E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1389 E32(0); // PA_SC_LINE_STIPPLE 1390 E32(0); // VGT_OUTPUT_PATH_CNTL 1391 E32(0); // VGT_HOS_CNTL 1392 E32(0); 1393 E32(0); 1394 E32(0); 1395 E32(0); 1396 E32(0); 1397 E32(0); 1398 E32(0); 1399 E32(0); 1400 E32(0); 1401 E32(0); 1402 E32(0); // VGT_GS_MODE 1403 1404 EREG(VGT_PRIMITIVEID_EN, 0); 1405 EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 1406 EREG(VGT_SHADER_STAGES_EN, 0); 1407 1408 PACK0(VGT_STRMOUT_CONFIG, 2); 1409 E32(0); 1410 E32(0); 1411 END_BATCH(); 1412} 1413 1414 1415/* 1416 * Commands 1417 */ 1418 1419void 1420evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1421{ 1422 RADEONInfoPtr info = RADEONPTR(pScrn); 1423 1424 BEGIN_BATCH(10); 1425 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1426 PACK3(IT_INDEX_TYPE, 1); 1427#if X_BYTE_ORDER == X_BIG_ENDIAN 1428 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1429#else 1430 E32(draw_conf->index_type); 1431#endif 1432 PACK3(IT_NUM_INSTANCES, 1); 1433 E32(draw_conf->num_instances); 1434 PACK3(IT_DRAW_INDEX_AUTO, 2); 1435 E32(draw_conf->num_indices); 1436 E32(draw_conf->vgt_draw_initiator); 1437 END_BATCH(); 1438} 1439 1440void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1441{ 1442 RADEONInfoPtr info = RADEONPTR(pScrn); 1443 struct radeon_accel_state *accel_state = info->accel_state; 1444 draw_config_t draw_conf; 1445 vtx_resource_t vtx_res; 1446 1447 if (accel_state->vbo.vb_start_op == -1) 1448 return; 1449 1450 CLEAR (draw_conf); 1451 CLEAR (vtx_res); 1452 1453 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1454 radeon_ib_discard(pScrn); 1455 radeon_cs_flush_indirect(pScrn); 1456 return; 1457 } 1458 1459 /* Vertex buffer setup */ 1460 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1461 vtx_res.id = SQ_FETCH_RESOURCE_vs; 1462 vtx_res.vtx_size_dw = vtx_size / 4; 1463 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1464 vtx_res.vb_addr = accel_state->vbo.vb_start_op; 1465 vtx_res.bo = accel_state->vbo.vb_bo; 1466 vtx_res.dst_sel_x = SQ_SEL_X; 1467 vtx_res.dst_sel_y = SQ_SEL_Y; 1468 vtx_res.dst_sel_z = SQ_SEL_Z; 1469 vtx_res.dst_sel_w = SQ_SEL_W; 1470#if X_BYTE_ORDER == X_BIG_ENDIAN 1471 vtx_res.endian = SQ_ENDIAN_8IN32; 1472#endif 1473 evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1474 1475 /* Draw */ 1476 draw_conf.prim_type = DI_PT_RECTLIST; 1477 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1478 draw_conf.num_instances = 1; 1479 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1480 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1481 1482 evergreen_draw_auto(pScrn, &draw_conf); 1483 1484 /* sync dst surface */ 1485 evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1486 accel_state->dst_size, 0, 1487 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1488 1489 accel_state->vbo.vb_start_op = -1; 1490 accel_state->cbuf.vb_start_op = -1; 1491 accel_state->ib_reset_op = 0; 1492 1493} 1494 1495