r6xx_accel.c revision de2362d3
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * Matthias Hopf <mhopf@suse.de> 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#include "xf86.h" 31 32#include <errno.h> 33 34#include "radeon.h" 35#include "r600_shader.h" 36#include "radeon_reg.h" 37#include "r600_reg.h" 38#include "r600_state.h" 39 40#include "radeon_vbo.h" 41#include "radeon_exa_shared.h" 42 43static const uint32_t R600_ROP[16] = { 44 RADEON_ROP3_ZERO, /* GXclear */ 45 RADEON_ROP3_DSa, /* Gxand */ 46 RADEON_ROP3_SDna, /* GXandReverse */ 47 RADEON_ROP3_S, /* GXcopy */ 48 RADEON_ROP3_DSna, /* GXandInverted */ 49 RADEON_ROP3_D, /* GXnoop */ 50 RADEON_ROP3_DSx, /* GXxor */ 51 RADEON_ROP3_DSo, /* GXor */ 52 RADEON_ROP3_DSon, /* GXnor */ 53 RADEON_ROP3_DSxn, /* GXequiv */ 54 RADEON_ROP3_Dn, /* GXinvert */ 55 RADEON_ROP3_SDno, /* GXorReverse */ 56 RADEON_ROP3_Sn, /* GXcopyInverted */ 57 RADEON_ROP3_DSno, /* GXorInverted */ 58 RADEON_ROP3_DSan, /* GXnand */ 59 RADEON_ROP3_ONE, /* GXset */ 60}; 61 62/* we try and batch operations together under KMS - 63 but it doesn't work yet without misrendering */ 64#define KMS_MULTI_OP 1 65 66/* Flush the indirect buffer to the kernel for submission to the card */ 67void R600CPFlushIndirect(ScrnInfoPtr pScrn) 68{ 69 radeon_cs_flush_indirect(pScrn); 70} 71 72void R600IBDiscard(ScrnInfoPtr pScrn) 73{ 74 radeon_ib_discard(pScrn); 75} 76 77void 78r600_wait_3d_idle_clean(ScrnInfoPtr pScrn) 79{ 80 RADEONInfoPtr info = RADEONPTR(pScrn); 81 82 //flush caches, don't generate timestamp 83 BEGIN_BATCH(5); 84 PACK3(IT_EVENT_WRITE, 1); 85 E32(CACHE_FLUSH_AND_INV_EVENT); 86 // wait for 3D idle clean 87 EREG(WAIT_UNTIL, (WAIT_3D_IDLE_bit | 88 WAIT_3D_IDLECLEAN_bit)); 89 END_BATCH(); 90} 91 92void 93r600_wait_3d_idle(ScrnInfoPtr pScrn) 94{ 95 RADEONInfoPtr info = RADEONPTR(pScrn); 96 97 BEGIN_BATCH(3); 98 EREG(WAIT_UNTIL, WAIT_3D_IDLE_bit); 99 END_BATCH(); 100} 101 102void 103r600_start_3d(ScrnInfoPtr pScrn) 104{ 105 RADEONInfoPtr info = RADEONPTR(pScrn); 106 107 if (info->ChipFamily < CHIP_FAMILY_RV770) { 108 BEGIN_BATCH(5); 109 PACK3(IT_START_3D_CMDBUF, 1); 110 E32(0); 111 } else 112 BEGIN_BATCH(3); 113 114 PACK3(IT_CONTEXT_CONTROL, 2); 115 E32(0x80000000); 116 E32(0x80000000); 117 END_BATCH(); 118 119} 120 121/* 122 * Setup of functional groups 123 */ 124 125// asic stack/thread/gpr limits - need to query the drm 126static void 127r600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 128{ 129 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 130 uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 131 RADEONInfoPtr info = RADEONPTR(pScrn); 132 133 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 134 (info->ChipFamily == CHIP_FAMILY_RV620) || 135 (info->ChipFamily == CHIP_FAMILY_RS780) || 136 (info->ChipFamily == CHIP_FAMILY_RS880) || 137 (info->ChipFamily == CHIP_FAMILY_RV710)) 138 sq_config = 0; // no VC 139 else 140 sq_config = VC_ENABLE_bit; 141 142 sq_config |= (DX9_CONSTS_bit | 143 ALU_INST_PREFER_VECTOR_bit | 144 (sq_conf->ps_prio << PS_PRIO_shift) | 145 (sq_conf->vs_prio << VS_PRIO_shift) | 146 (sq_conf->gs_prio << GS_PRIO_shift) | 147 (sq_conf->es_prio << ES_PRIO_shift)); 148 149 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 150 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 151 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 152 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 153 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 154 155 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 156 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 157 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 158 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 159 160 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 161 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 162 163 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 164 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 165 166 BEGIN_BATCH(8); 167 PACK0(SQ_CONFIG, 6); 168 E32(sq_config); 169 E32(sq_gpr_resource_mgmt_1); 170 E32(sq_gpr_resource_mgmt_2); 171 E32(sq_thread_resource_mgmt); 172 E32(sq_stack_resource_mgmt_1); 173 E32(sq_stack_resource_mgmt_2); 174 END_BATCH(); 175} 176 177void 178r600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 179{ 180 uint32_t cb_color_info, cb_color_control; 181 unsigned pitch, slice, h, array_mode; 182 RADEONInfoPtr info = RADEONPTR(pScrn); 183 184 185 if (cb_conf->surface) { 186 switch (cb_conf->surface->level[0].mode) { 187 case RADEON_SURF_MODE_1D: 188 array_mode = 2; 189 break; 190 case RADEON_SURF_MODE_2D: 191 array_mode = 4; 192 break; 193 default: 194 array_mode = 0; 195 break; 196 } 197 pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1; 198 slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1; 199 } else 200 { 201 array_mode = cb_conf->array_mode; 202 pitch = (cb_conf->w / 8) - 1; 203 h = RADEON_ALIGN(cb_conf->h, 8); 204 slice = ((cb_conf->w * h) / 64) - 1; 205 } 206 207 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 208 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 209 (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 210 (cb_conf->number_type << NUMBER_TYPE_shift) | 211 (cb_conf->comp_swap << COMP_SWAP_shift) | 212 (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 213 if (cb_conf->read_size) 214 cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 215 if (cb_conf->blend_clamp) 216 cb_color_info |= BLEND_CLAMP_bit; 217 if (cb_conf->clear_color) 218 cb_color_info |= CLEAR_COLOR_bit; 219 if (cb_conf->blend_bypass) 220 cb_color_info |= BLEND_BYPASS_bit; 221 if (cb_conf->blend_float32) 222 cb_color_info |= BLEND_FLOAT32_bit; 223 if (cb_conf->simple_float) 224 cb_color_info |= SIMPLE_FLOAT_bit; 225 if (cb_conf->round_mode) 226 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 227 if (cb_conf->tile_compact) 228 cb_color_info |= TILE_COMPACT_bit; 229 if (cb_conf->source_format) 230 cb_color_info |= SOURCE_FORMAT_bit; 231 232 BEGIN_BATCH(3 + 2); 233 EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 234 RELOC_BATCH(cb_conf->bo, 0, domain); 235 END_BATCH(); 236 237 // rv6xx workaround 238 if ((info->ChipFamily > CHIP_FAMILY_R600) && 239 (info->ChipFamily < CHIP_FAMILY_RV770)) { 240 BEGIN_BATCH(2); 241 PACK3(IT_SURFACE_BASE_UPDATE, 1); 242 E32((2 << cb_conf->id)); 243 END_BATCH(); 244 } 245 /* Set CMASK & TILE buffer to the offset of color buffer as 246 * we don't use those this shouldn't cause any issue and we 247 * then have a valid cmd stream 248 */ 249 BEGIN_BATCH(3 + 2); 250 EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 251 RELOC_BATCH(cb_conf->bo, 0, domain); 252 END_BATCH(); 253 BEGIN_BATCH(3 + 2); 254 EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 255 RELOC_BATCH(cb_conf->bo, 0, domain); 256 END_BATCH(); 257 BEGIN_BATCH(9); 258 // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 259 EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 260 (slice << SLICE_TILE_MAX_shift))); 261 EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 262 (0 << SLICE_MAX_shift))); 263 EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 264 (0 << FMASK_TILE_MAX_shift))); 265 END_BATCH(); 266 267 BEGIN_BATCH(3 + 2); 268 EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 269 RELOC_BATCH(cb_conf->bo, 0, domain); 270 END_BATCH(); 271 272 BEGIN_BATCH(9); 273 EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 274 cb_color_control = R600_ROP[cb_conf->rop] | 275 (cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift); 276 if (info->ChipFamily == CHIP_FAMILY_R600) { 277 /* no per-MRT blend on R600 */ 278 EREG(CB_COLOR_CONTROL, cb_color_control); 279 EREG(CB_BLEND_CONTROL, cb_conf->blendcntl); 280 } else { 281 if (cb_conf->blend_enable) 282 cb_color_control |= PER_MRT_BLEND_bit; 283 EREG(CB_COLOR_CONTROL, cb_color_control); 284 EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 285 } 286 END_BATCH(); 287} 288 289static void 290r600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 291 uint32_t size, uint64_t mc_addr, 292 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 293{ 294 RADEONInfoPtr info = RADEONPTR(pScrn); 295 uint32_t cp_coher_size; 296 if (size == 0xffffffff) 297 cp_coher_size = 0xffffffff; 298 else 299 cp_coher_size = ((size + 255) >> 8); 300 301 BEGIN_BATCH(5 + 2); 302 PACK3(IT_SURFACE_SYNC, 4); 303 E32(sync_type); 304 E32(cp_coher_size); 305 E32((mc_addr >> 8)); 306 E32(10); /* poll interval */ 307 RELOC_BATCH(bo, rdomains, wdomain); 308 END_BATCH(); 309} 310 311/* inserts a wait for vline in the command stream */ 312void 313r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 314 xf86CrtcPtr crtc, int start, int stop) 315{ 316 RADEONInfoPtr info = RADEONPTR(pScrn); 317 drmmode_crtc_private_ptr drmmode_crtc; 318 319 if (!crtc) 320 return; 321 322 if (!crtc->enabled) 323 return; 324 325 if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 326 return; 327 328 start = max(start, crtc->y); 329 stop = min(stop, crtc->y + crtc->mode.VDisplay); 330 331 if (start >= stop) 332 return; 333 334 drmmode_crtc = crtc->driver_private; 335 336 BEGIN_BATCH(11); 337 /* set the VLINE range */ 338 EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 339 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 340 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 341 342 /* tell the CP to poll the VLINE state register */ 343 PACK3(IT_WAIT_REG_MEM, 6); 344 E32(IT_WAIT_REG | IT_WAIT_EQ); 345 E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 346 E32(0); 347 E32(0); // Ref value 348 E32(AVIVO_D1MODE_VLINE_STAT); // Mask 349 E32(10); // Wait interval 350 /* add crtc reloc */ 351 PACK3(IT_NOP, 1); 352 E32(drmmode_crtc->mode_crtc->crtc_id); 353 END_BATCH(); 354} 355 356void 357r600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 358{ 359 RADEONInfoPtr info = RADEONPTR(pScrn); 360 361 BEGIN_BATCH(8); 362 /* Interpolator setup */ 363 EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 364 PACK0(SPI_PS_IN_CONTROL_0, 3); 365 E32((num_interp << NUM_INTERP_shift)); 366 E32(0); 367 E32(0); 368 END_BATCH(); 369} 370 371void 372r600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 373{ 374 RADEONInfoPtr info = RADEONPTR(pScrn); 375 uint32_t sq_pgm_resources; 376 377 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 378 (fs_conf->stack_size << STACK_SIZE_shift)); 379 380 if (fs_conf->dx10_clamp) 381 sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 382 383 BEGIN_BATCH(3 + 2); 384 EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 385 RELOC_BATCH(fs_conf->bo, domain, 0); 386 END_BATCH(); 387 388 BEGIN_BATCH(6); 389 EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 390 EREG(SQ_PGM_CF_OFFSET_FS, 0); 391 END_BATCH(); 392} 393 394void 395r600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 396{ 397 RADEONInfoPtr info = RADEONPTR(pScrn); 398 uint32_t sq_pgm_resources; 399 400 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 401 (vs_conf->stack_size << STACK_SIZE_shift)); 402 403 if (vs_conf->dx10_clamp) 404 sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 405 if (vs_conf->fetch_cache_lines) 406 sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 407 if (vs_conf->uncached_first_inst) 408 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 409 410 /* flush SQ cache */ 411 r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 412 vs_conf->shader_size, vs_conf->shader_addr, 413 vs_conf->bo, domain, 0); 414 415 BEGIN_BATCH(3 + 2); 416 EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 417 RELOC_BATCH(vs_conf->bo, domain, 0); 418 END_BATCH(); 419 420 BEGIN_BATCH(6); 421 EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources); 422 EREG(SQ_PGM_CF_OFFSET_VS, 0); 423 END_BATCH(); 424} 425 426void 427r600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 428{ 429 RADEONInfoPtr info = RADEONPTR(pScrn); 430 uint32_t sq_pgm_resources; 431 432 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 433 (ps_conf->stack_size << STACK_SIZE_shift)); 434 435 if (ps_conf->dx10_clamp) 436 sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 437 if (ps_conf->fetch_cache_lines) 438 sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 439 if (ps_conf->uncached_first_inst) 440 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 441 if (ps_conf->clamp_consts) 442 sq_pgm_resources |= CLAMP_CONSTS_bit; 443 444 /* flush SQ cache */ 445 r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 446 ps_conf->shader_size, ps_conf->shader_addr, 447 ps_conf->bo, domain, 0); 448 449 BEGIN_BATCH(3 + 2); 450 EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 451 RELOC_BATCH(ps_conf->bo, domain, 0); 452 END_BATCH(); 453 454 BEGIN_BATCH(9); 455 EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources); 456 EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 457 EREG(SQ_PGM_CF_OFFSET_PS, 0); 458 END_BATCH(); 459} 460 461void 462r600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf) 463{ 464 RADEONInfoPtr info = RADEONPTR(pScrn); 465 int i; 466 const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 467 468 BEGIN_BATCH(2 + countreg); 469 PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 470 for (i = 0; i < countreg; i++) 471 EFLOAT(const_buf[i]); 472 END_BATCH(); 473} 474 475void 476r600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 477{ 478 RADEONInfoPtr info = RADEONPTR(pScrn); 479 /* bool register order is: ps, vs, gs; one register each 480 * 1 bits per bool; 32 bools each for ps, vs, gs. 481 */ 482 BEGIN_BATCH(3); 483 EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 484 END_BATCH(); 485} 486 487static void 488r600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 489{ 490 RADEONInfoPtr info = RADEONPTR(pScrn); 491 struct radeon_accel_state *accel_state = info->accel_state; 492 uint32_t sq_vtx_constant_word2; 493 494 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 495 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 496 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 497 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 498 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 499 if (res->clamp_x) 500 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 501 502 if (res->format_comp_all) 503 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 504 505 if (res->srf_mode_all) 506 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 507 508 /* flush vertex cache */ 509 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 510 (info->ChipFamily == CHIP_FAMILY_RV620) || 511 (info->ChipFamily == CHIP_FAMILY_RS780) || 512 (info->ChipFamily == CHIP_FAMILY_RS880) || 513 (info->ChipFamily == CHIP_FAMILY_RV710)) 514 r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 515 accel_state->vbo.vb_offset, 0, 516 res->bo, 517 domain, 0); 518 else 519 r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 520 accel_state->vbo.vb_offset, 0, 521 res->bo, 522 domain, 0); 523 524 BEGIN_BATCH(9 + 2); 525 PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 526 E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 527 E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 528 E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 529 E32(res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 530 E32(0); // 4: n/a 531 E32(0); // 5: n/a 532 E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 533 RELOC_BATCH(res->bo, domain, 0); 534 END_BATCH(); 535} 536 537void 538r600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 539{ 540 RADEONInfoPtr info = RADEONPTR(pScrn); 541 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 542 uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 543 uint32_t array_mode, pitch; 544 545 if (tex_res->surface) { 546 switch (tex_res->surface->level[0].mode) { 547 case RADEON_SURF_MODE_1D: 548 array_mode = 2; 549 break; 550 case RADEON_SURF_MODE_2D: 551 array_mode = 4; 552 break; 553 default: 554 array_mode = 0; 555 break; 556 } 557 pitch = tex_res->surface->level[0].nblk_x >> 3; 558 } else 559 { 560 array_mode = tex_res->tile_mode; 561 pitch = (tex_res->pitch + 7) >> 3; 562 } 563 564 sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 565 (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 566 567 if (tex_res->w) 568 sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) | 569 ((tex_res->w - 1) << TEX_WIDTH_shift)); 570 571 if (tex_res->tile_type) 572 sq_tex_resource_word0 |= TILE_TYPE_bit; 573 574 sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 575 576 if (tex_res->h) 577 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 578 if (tex_res->depth) 579 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 580 581 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 582 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 583 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 584 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 585 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 586 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 587 (tex_res->request_size << REQUEST_SIZE_shift) | 588 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 589 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 590 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 591 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 592 (tex_res->base_level << BASE_LEVEL_shift)); 593 594 if (tex_res->srf_mode_all) 595 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 596 if (tex_res->force_degamma) 597 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 598 599 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 600 (tex_res->base_array << BASE_ARRAY_shift) | 601 (tex_res->last_array << LAST_ARRAY_shift)); 602 603 sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 604 (tex_res->perf_modulation << PERF_MODULATION_shift) | 605 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 606 607 if (tex_res->interlaced) 608 sq_tex_resource_word6 |= INTERLACED_bit; 609 610 /* flush texture cache */ 611 r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 612 tex_res->size, tex_res->base, 613 tex_res->bo, domain, 0); 614 615 BEGIN_BATCH(9 + 4); 616 PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 617 E32(sq_tex_resource_word0); 618 E32(sq_tex_resource_word1); 619 E32(((tex_res->base) >> 8)); 620 E32(((tex_res->mip_base) >> 8)); 621 E32(sq_tex_resource_word4); 622 E32(sq_tex_resource_word5); 623 E32(sq_tex_resource_word6); 624 RELOC_BATCH(tex_res->bo, domain, 0); 625 RELOC_BATCH(tex_res->mip_bo, domain, 0); 626 END_BATCH(); 627} 628 629void 630r600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 631{ 632 RADEONInfoPtr info = RADEONPTR(pScrn); 633 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 634 635 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 636 (s->clamp_y << CLAMP_Y_shift) | 637 (s->clamp_z << CLAMP_Z_shift) | 638 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 639 (s->xy_min_filter << XY_MIN_FILTER_shift) | 640 (s->z_filter << Z_FILTER_shift) | 641 (s->mip_filter << MIP_FILTER_shift) | 642 (s->border_color << BORDER_COLOR_TYPE_shift) | 643 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 644 (s->chroma_key << CHROMA_KEY_shift)); 645 if (s->point_sampling_clamp) 646 sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 647 if (s->tex_array_override) 648 sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 649 if (s->lod_uses_minor_axis) 650 sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 651 652 sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 653 (s->max_lod << MAX_LOD_shift) | 654 (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 655 656 sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 657 (s->perf_mip << PERF_MIP_shift) | 658 (s->perf_z << PERF_Z_shift)); 659 if (s->mc_coord_truncate) 660 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 661 if (s->force_degamma) 662 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 663 if (s->high_precision_filter) 664 sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 665 if (s->fetch_4) 666 sq_tex_sampler_word2 |= FETCH_4_bit; 667 if (s->sample_is_pcf) 668 sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 669 if (s->type) 670 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 671 672 BEGIN_BATCH(5); 673 PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 674 E32(sq_tex_sampler_word0); 675 E32(sq_tex_sampler_word1); 676 E32(sq_tex_sampler_word2); 677 END_BATCH(); 678} 679 680//XXX deal with clip offsets in clip setup 681void 682r600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 683{ 684 RADEONInfoPtr info = RADEONPTR(pScrn); 685 686 BEGIN_BATCH(4); 687 PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 688 E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 689 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 690 E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 691 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 692 END_BATCH(); 693} 694 695void 696r600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 697{ 698 RADEONInfoPtr info = RADEONPTR(pScrn); 699 700 BEGIN_BATCH(4); 701 PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 702 E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 703 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 704 WINDOW_OFFSET_DISABLE_bit)); 705 E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 706 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 707 END_BATCH(); 708} 709 710void 711r600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 712{ 713 RADEONInfoPtr info = RADEONPTR(pScrn); 714 715 BEGIN_BATCH(4); 716 PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 717 E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 718 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 719 WINDOW_OFFSET_DISABLE_bit)); 720 E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 721 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 722 END_BATCH(); 723} 724 725void 726r600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 727{ 728 RADEONInfoPtr info = RADEONPTR(pScrn); 729 730 BEGIN_BATCH(4); 731 PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 732 E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 733 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 734 WINDOW_OFFSET_DISABLE_bit)); 735 E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 736 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 737 END_BATCH(); 738} 739 740void 741r600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 742{ 743 RADEONInfoPtr info = RADEONPTR(pScrn); 744 745 BEGIN_BATCH(4); 746 PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 747 E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 748 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 749 E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 750 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 751 END_BATCH(); 752} 753 754/* 755 * Setup of default state 756 */ 757 758void 759r600_set_default_state(ScrnInfoPtr pScrn) 760{ 761 tex_resource_t tex_res; 762 shader_config_t fs_conf; 763 sq_config_t sq_conf; 764 int i; 765 RADEONInfoPtr info = RADEONPTR(pScrn); 766 struct radeon_accel_state *accel_state = info->accel_state; 767 768 if (accel_state->XInited3D) 769 return; 770 771 memset(&tex_res, 0, sizeof(tex_resource_t)); 772 memset(&fs_conf, 0, sizeof(shader_config_t)); 773 774 accel_state->XInited3D = TRUE; 775 776 r600_start_3d(pScrn); 777 778 // SQ 779 sq_conf.ps_prio = 0; 780 sq_conf.vs_prio = 1; 781 sq_conf.gs_prio = 2; 782 sq_conf.es_prio = 3; 783 // need to set stack/thread/gpr limits based on the asic 784 // for now just set them low enough so any card will work 785 // see r600_cp.c in the drm 786 switch (info->ChipFamily) { 787 case CHIP_FAMILY_R600: 788 sq_conf.num_ps_gprs = 192; 789 sq_conf.num_vs_gprs = 56; 790 sq_conf.num_temp_gprs = 4; 791 sq_conf.num_gs_gprs = 0; 792 sq_conf.num_es_gprs = 0; 793 sq_conf.num_ps_threads = 136; 794 sq_conf.num_vs_threads = 48; 795 sq_conf.num_gs_threads = 4; 796 sq_conf.num_es_threads = 4; 797 sq_conf.num_ps_stack_entries = 128; 798 sq_conf.num_vs_stack_entries = 128; 799 sq_conf.num_gs_stack_entries = 0; 800 sq_conf.num_es_stack_entries = 0; 801 break; 802 case CHIP_FAMILY_RV630: 803 case CHIP_FAMILY_RV635: 804 sq_conf.num_ps_gprs = 84; 805 sq_conf.num_vs_gprs = 36; 806 sq_conf.num_temp_gprs = 4; 807 sq_conf.num_gs_gprs = 0; 808 sq_conf.num_es_gprs = 0; 809 sq_conf.num_ps_threads = 144; 810 sq_conf.num_vs_threads = 40; 811 sq_conf.num_gs_threads = 4; 812 sq_conf.num_es_threads = 4; 813 sq_conf.num_ps_stack_entries = 40; 814 sq_conf.num_vs_stack_entries = 40; 815 sq_conf.num_gs_stack_entries = 32; 816 sq_conf.num_es_stack_entries = 16; 817 break; 818 case CHIP_FAMILY_RV610: 819 case CHIP_FAMILY_RV620: 820 case CHIP_FAMILY_RS780: 821 case CHIP_FAMILY_RS880: 822 default: 823 sq_conf.num_ps_gprs = 84; 824 sq_conf.num_vs_gprs = 36; 825 sq_conf.num_temp_gprs = 4; 826 sq_conf.num_gs_gprs = 0; 827 sq_conf.num_es_gprs = 0; 828 sq_conf.num_ps_threads = 136; 829 sq_conf.num_vs_threads = 48; 830 sq_conf.num_gs_threads = 4; 831 sq_conf.num_es_threads = 4; 832 sq_conf.num_ps_stack_entries = 40; 833 sq_conf.num_vs_stack_entries = 40; 834 sq_conf.num_gs_stack_entries = 32; 835 sq_conf.num_es_stack_entries = 16; 836 break; 837 case CHIP_FAMILY_RV670: 838 sq_conf.num_ps_gprs = 144; 839 sq_conf.num_vs_gprs = 40; 840 sq_conf.num_temp_gprs = 4; 841 sq_conf.num_gs_gprs = 0; 842 sq_conf.num_es_gprs = 0; 843 sq_conf.num_ps_threads = 136; 844 sq_conf.num_vs_threads = 48; 845 sq_conf.num_gs_threads = 4; 846 sq_conf.num_es_threads = 4; 847 sq_conf.num_ps_stack_entries = 40; 848 sq_conf.num_vs_stack_entries = 40; 849 sq_conf.num_gs_stack_entries = 32; 850 sq_conf.num_es_stack_entries = 16; 851 break; 852 case CHIP_FAMILY_RV770: 853 sq_conf.num_ps_gprs = 192; 854 sq_conf.num_vs_gprs = 56; 855 sq_conf.num_temp_gprs = 4; 856 sq_conf.num_gs_gprs = 0; 857 sq_conf.num_es_gprs = 0; 858 sq_conf.num_ps_threads = 188; 859 sq_conf.num_vs_threads = 60; 860 sq_conf.num_gs_threads = 0; 861 sq_conf.num_es_threads = 0; 862 sq_conf.num_ps_stack_entries = 256; 863 sq_conf.num_vs_stack_entries = 256; 864 sq_conf.num_gs_stack_entries = 0; 865 sq_conf.num_es_stack_entries = 0; 866 break; 867 case CHIP_FAMILY_RV730: 868 case CHIP_FAMILY_RV740: 869 sq_conf.num_ps_gprs = 84; 870 sq_conf.num_vs_gprs = 36; 871 sq_conf.num_temp_gprs = 4; 872 sq_conf.num_gs_gprs = 0; 873 sq_conf.num_es_gprs = 0; 874 sq_conf.num_ps_threads = 188; 875 sq_conf.num_vs_threads = 60; 876 sq_conf.num_gs_threads = 0; 877 sq_conf.num_es_threads = 0; 878 sq_conf.num_ps_stack_entries = 128; 879 sq_conf.num_vs_stack_entries = 128; 880 sq_conf.num_gs_stack_entries = 0; 881 sq_conf.num_es_stack_entries = 0; 882 break; 883 case CHIP_FAMILY_RV710: 884 sq_conf.num_ps_gprs = 192; 885 sq_conf.num_vs_gprs = 56; 886 sq_conf.num_temp_gprs = 4; 887 sq_conf.num_gs_gprs = 0; 888 sq_conf.num_es_gprs = 0; 889 sq_conf.num_ps_threads = 144; 890 sq_conf.num_vs_threads = 48; 891 sq_conf.num_gs_threads = 0; 892 sq_conf.num_es_threads = 0; 893 sq_conf.num_ps_stack_entries = 128; 894 sq_conf.num_vs_stack_entries = 128; 895 sq_conf.num_gs_stack_entries = 0; 896 sq_conf.num_es_stack_entries = 0; 897 break; 898 } 899 900 r600_sq_setup(pScrn, &sq_conf); 901 902 /* set fake reloc for unused depth */ 903 BEGIN_BATCH(3 + 2); 904 EREG(DB_DEPTH_INFO, 0); 905 RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 906 END_BATCH(); 907 908 BEGIN_BATCH(80); 909 if (info->ChipFamily < CHIP_FAMILY_RV770) { 910 EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 911 (28 << TD_FIFO_CREDIT_shift))); 912 EREG(VC_ENHANCE, 0); 913 EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 914 EREG(DB_DEBUG, 0x82000000); /* ? */ 915 EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 916 (16 << DEPTH_FLUSH_shift) | 917 (0 << FORCE_SUMMARIZE_shift) | 918 (4 << DEPTH_PENDING_FREE_shift) | 919 (16 << DEPTH_CACHELINE_FREE_shift) | 920 0)); 921 } else { 922 EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 923 (28 << TD_FIFO_CREDIT_shift))); 924 EREG(VC_ENHANCE, 0); 925 EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 926 EREG(DB_DEBUG, 0); 927 EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 928 (16 << DEPTH_FLUSH_shift) | 929 (0 << FORCE_SUMMARIZE_shift) | 930 (4 << DEPTH_PENDING_FREE_shift) | 931 (4 << DEPTH_CACHELINE_FREE_shift) | 932 0)); 933 } 934 935 PACK0(SQ_VTX_BASE_VTX_LOC, 2); 936 E32(0); 937 E32(0); 938 939 PACK0(SQ_ESGS_RING_ITEMSIZE, 9); 940 E32(0); // SQ_ESGS_RING_ITEMSIZE 941 E32(0); // SQ_GSVS_RING_ITEMSIZE 942 E32(0); // SQ_ESTMP_RING_ITEMSIZE 943 E32(0); // SQ_GSTMP_RING_ITEMSIZE 944 E32(0); // SQ_VSTMP_RING_ITEMSIZE 945 E32(0); // SQ_PSTMP_RING_ITEMSIZE 946 E32(0); // SQ_FBUF_RING_ITEMSIZE 947 E32(0); // SQ_REDUC_RING_ITEMSIZE 948 E32(0); // SQ_GS_VERT_ITEMSIZE 949 950 // DB 951 EREG(DB_DEPTH_CONTROL, 0); 952 PACK0(DB_RENDER_CONTROL, 2); 953 E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 954 if (info->ChipFamily < CHIP_FAMILY_RV770) 955 E32(FORCE_SHADER_Z_ORDER_bit); 956 else 957 E32(0); 958 EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 959 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 960 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 961 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 962 EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 963 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 964 965 PACK0(DB_STENCIL_CLEAR, 2); 966 E32(0); // DB_STENCIL_CLEAR 967 E32(0); // DB_DEPTH_CLEAR 968 969 PACK0(DB_STENCILREFMASK, 3); 970 E32(0); // DB_STENCILREFMASK 971 E32(0); // DB_STENCILREFMASK_BF 972 E32(0); // SX_ALPHA_REF 973 974 PACK0(CB_CLRCMP_CONTROL, 4); 975 E32(1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 976 E32(0); // CB_CLRCMP_SRC 977 E32(0); // CB_CLRCMP_DST 978 E32(0); // CB_CLRCMP_MSK 979 980 EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 981 EREG(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 982 983 PACK0(SX_ALPHA_TEST_CONTROL, 5); 984 E32(0); // SX_ALPHA_TEST_CONTROL 985 E32(0x00000000); // CB_BLEND_RED 986 E32(0x00000000); // CB_BLEND_GREEN 987 E32(0x00000000); // CB_BLEND_BLUE 988 E32(0x00000000); // CB_BLEND_ALPHA 989 990 EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 991 (0 << WINDOW_Y_OFFSET_shift))); 992 993 if (info->ChipFamily < CHIP_FAMILY_RV770) 994 EREG(R7xx_PA_SC_EDGERULE, 0x00000000); 995 else 996 EREG(R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 997 998 EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 999 1000 END_BATCH(); 1001 1002 /* clip boolean is set to always visible -> doesn't matter */ 1003 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1004 r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192); 1005 1006 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1007 r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192); 1008 1009 BEGIN_BATCH(49); 1010 PACK0(PA_SC_MPASS_PS_CNTL, 2); 1011 E32(0); 1012 if (info->ChipFamily < CHIP_FAMILY_RV770) 1013 E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1014 else 1015 E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1016 0x00500000)); /* ? */ 1017 1018 PACK0(PA_SC_LINE_CNTL, 9); 1019 E32(0); // PA_SC_LINE_CNTL 1020 E32(0); // PA_SC_AA_CONFIG 1021 E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL 1022 (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 1023 EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 1024 EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 1025 EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1026 EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 1027 E32(0); // PA_SC_AA_SAMPLE_LOCS_MCTX 1028 E32(0); // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M 1029 1030 EREG(PA_SC_AA_MASK, 0xFFFFFFFF); 1031 1032 PACK0(PA_CL_CLIP_CNTL, 5); 1033 E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1034 E32(FACE_bit); // PA_SU_SC_MODE_CNTL 1035 E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1036 E32(0); // PA_CL_VS_OUT_CNTL 1037 E32(0); // PA_CL_NANINF_CNTL 1038 1039 PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1040 E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL 1041 E32(0); // PA_SU_POLY_OFFSET_CLAMP 1042 E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE 1043 E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET 1044 E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE 1045 E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET 1046 1047 // SPI 1048 if (info->ChipFamily < CHIP_FAMILY_RV770) 1049 EREG(R7xx_SPI_THREAD_GROUPING, 0); 1050 else 1051 EREG(R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1052 1053 /* default Interpolator setup */ 1054 EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1055 (1 << SEMANTIC_1_shift))); 1056 PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1057 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1058 E32(((0 << SEMANTIC_shift) | 1059 (0x01 << DEFAULT_VAL_shift) | 1060 SEL_CENTROID_bit)); 1061 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1062 E32(((1 << SEMANTIC_shift) | 1063 (0x01 << DEFAULT_VAL_shift) | 1064 SEL_CENTROID_bit)); 1065 1066 PACK0(SPI_INPUT_Z, 4); 1067 E32(0); // SPI_INPUT_Z 1068 E32(0); // SPI_FOG_CNTL 1069 E32(0); // SPI_FOG_FUNC_SCALE 1070 E32(0); // SPI_FOG_FUNC_BIAS 1071 1072 END_BATCH(); 1073 1074 // clear FS 1075 fs_conf.bo = accel_state->shaders_bo; 1076 r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1077 1078 // VGT 1079 BEGIN_BATCH(46); 1080 PACK0(VGT_MAX_VTX_INDX, 4); 1081 E32(0xffffff); // VGT_MAX_VTX_INDX 1082 E32(0); // VGT_MIN_VTX_INDX 1083 E32(0); // VGT_INDX_OFFSET 1084 E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX 1085 1086 EREG(VGT_PRIMITIVEID_EN, 0); 1087 EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 1088 1089 PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 1090 E32(0); // VGT_INSTANCE_STEP_RATE_0 1091 E32(0); // VGT_INSTANCE_STEP_RATE_1 1092 1093 PACK0(PA_SU_POINT_SIZE, 17); 1094 E32(0); // PA_SU_POINT_SIZE 1095 E32(0); // PA_SU_POINT_MINMAX 1096 E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1097 E32(0); // PA_SC_LINE_STIPPLE 1098 E32(0); // VGT_OUTPUT_PATH_CNTL 1099 E32(0); // VGT_HOS_CNTL 1100 E32(0); // VGT_HOS_MAX_TESS_LEVEL 1101 E32(0); // VGT_HOS_MIN_TESS_LEVEL 1102 E32(0); // VGT_HOS_REUSE_DEPTH 1103 E32(0); // VGT_GROUP_PRIM_TYPE 1104 E32(0); // VGT_GROUP_FIRST_DECR 1105 E32(0); // VGT_GROUP_DECR 1106 E32(0); // VGT_GROUP_VECT_0_CNTL 1107 E32(0); // VGT_GROUP_VECT_1_CNTL 1108 E32(0); // VGT_GROUP_VECT_0_FMT_CNTL 1109 E32(0); // VGT_GROUP_VECT_1_FMT_CNTL 1110 E32(0); // VGT_GS_MODE 1111 1112 PACK0(VGT_STRMOUT_EN, 3); 1113 E32(0); // VGT_STRMOUT_EN 1114 E32(0); // VGT_REUSE_OFF 1115 E32(0); // VGT_VTX_CNT_EN 1116 1117 EREG(VGT_STRMOUT_BUFFER_EN, 0); 1118 EREG(SX_MISC, 0); 1119 END_BATCH(); 1120} 1121 1122 1123/* 1124 * Commands 1125 */ 1126 1127void 1128r600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices) 1129{ 1130 RADEONInfoPtr info = RADEONPTR(pScrn); 1131 uint32_t i, count; 1132 1133 // calculate num of packets 1134 count = 2; 1135 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1136 count += (draw_conf->num_indices + 1) / 2; 1137 else 1138 count += draw_conf->num_indices; 1139 1140 BEGIN_BATCH(8 + count); 1141 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1142 PACK3(IT_INDEX_TYPE, 1); 1143#if X_BYTE_ORDER == X_BIG_ENDIAN 1144 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1145#else 1146 E32(draw_conf->index_type); 1147#endif 1148 PACK3(IT_NUM_INSTANCES, 1); 1149 E32(draw_conf->num_instances); 1150 1151 PACK3(IT_DRAW_INDEX_IMMD, count); 1152 E32(draw_conf->num_indices); 1153 E32(draw_conf->vgt_draw_initiator); 1154 1155 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1156 for (i = 0; i < draw_conf->num_indices; i += 2) { 1157 if ((i + 1) == draw_conf->num_indices) 1158 E32(indices[i]); 1159 else 1160 E32((indices[i] | (indices[i + 1] << 16))); 1161 } 1162 } else { 1163 for (i = 0; i < draw_conf->num_indices; i++) 1164 E32(indices[i]); 1165 } 1166 END_BATCH(); 1167} 1168 1169void 1170r600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1171{ 1172 RADEONInfoPtr info = RADEONPTR(pScrn); 1173 1174 BEGIN_BATCH(10); 1175 EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1176 PACK3(IT_INDEX_TYPE, 1); 1177#if X_BYTE_ORDER == X_BIG_ENDIAN 1178 E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1179#else 1180 E32(draw_conf->index_type); 1181#endif 1182 PACK3(IT_NUM_INSTANCES, 1); 1183 E32(draw_conf->num_instances); 1184 PACK3(IT_DRAW_INDEX_AUTO, 2); 1185 E32(draw_conf->num_indices); 1186 E32(draw_conf->vgt_draw_initiator); 1187 END_BATCH(); 1188} 1189 1190void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1191{ 1192 RADEONInfoPtr info = RADEONPTR(pScrn); 1193 struct radeon_accel_state *accel_state = info->accel_state; 1194 draw_config_t draw_conf; 1195 vtx_resource_t vtx_res; 1196 1197 if (accel_state->vbo.vb_start_op == -1) 1198 return; 1199 1200 CLEAR (draw_conf); 1201 CLEAR (vtx_res); 1202 1203 if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1204 R600IBDiscard(pScrn); 1205 return; 1206 } 1207 1208 /* Vertex buffer setup */ 1209 accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1210 vtx_res.id = SQ_VTX_RESOURCE_vs; 1211 vtx_res.vtx_size_dw = vtx_size / 4; 1212 vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1213 vtx_res.mem_req_size = 1; 1214 vtx_res.vb_addr = accel_state->vbo.vb_start_op; 1215 vtx_res.bo = accel_state->vbo.vb_bo; 1216#if X_BYTE_ORDER == X_BIG_ENDIAN 1217 vtx_res.endian = SQ_ENDIAN_8IN32; 1218#endif 1219 r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1220 1221 /* Draw */ 1222 draw_conf.prim_type = DI_PT_RECTLIST; 1223 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1224 draw_conf.num_instances = 1; 1225 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1226 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1227 1228 r600_draw_auto(pScrn, &draw_conf); 1229 1230 /* XXX drm should handle this in fence submit */ 1231 r600_wait_3d_idle_clean(pScrn); 1232 1233 /* sync dst surface */ 1234 r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1235 accel_state->dst_size, 0, 1236 accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1237 1238 accel_state->vbo.vb_start_op = -1; 1239 accel_state->ib_reset_op = 0; 1240 1241} 1242 1243