r6xx_accel.c revision b7e1c893
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Alex Deucher <alexander.deucher@amd.com> 24 * Matthias Hopf <mhopf@suse.de> 25 */ 26#ifdef HAVE_CONFIG_H 27#include "config.h" 28#endif 29 30#include "xf86.h" 31 32#include <errno.h> 33 34#include "radeon.h" 35#include "r600_shader.h" 36#include "radeon_reg.h" 37#include "r600_reg.h" 38#include "r600_state.h" 39 40#include "radeon_drm.h" 41 42/* Flush the indirect buffer to the kernel for submission to the card */ 43void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) 44{ 45 RADEONInfoPtr info = RADEONPTR(pScrn); 46 drmBufPtr buffer = ib; 47 int start = 0; 48 drm_radeon_indirect_t indirect; 49 50 if (!buffer) return; 51 52 //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", 53 // buffer->idx); 54 55 while (buffer->used & 0x3c){ 56 E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ 57 } 58 59 //ErrorF("buffer bytes: %d\n", buffer->used); 60 61 indirect.idx = buffer->idx; 62 indirect.start = start; 63 indirect.end = buffer->used; 64 indirect.discard = 1; 65 66 drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, 67 &indirect, sizeof(drm_radeon_indirect_t)); 68 69} 70 71void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) 72{ 73 if (!ib) return; 74 75 ib->used = 0; 76 R600CPFlushIndirect(pScrn, ib); 77} 78 79void 80wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) 81{ 82 83 //flush caches, don't generate timestamp 84 PACK3(ib, IT_EVENT_WRITE, 1); 85 E32(ib, CACHE_FLUSH_AND_INV_EVENT); 86 // wait for 3D idle clean 87 EREG(ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | 88 WAIT_3D_IDLECLEAN_bit)); 89} 90 91void 92wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) 93{ 94 95 EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); 96 97} 98 99static void 100reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib) 101{ 102 int i; 103 104 PACK0(ib, CB_COLOR0_INFO, 8); 105 for (i = 0; i < 8; i++) 106 E32(ib, 0); 107} 108 109static void 110reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib) 111{ 112 int i; 113 114 wait_3d_idle(pScrn, ib); 115 116 PACK0(ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num); 117 for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++) 118 E32(ib, 0); 119 PACK0(ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num); 120 for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++) 121 E32(ib, 0); 122 123 wait_3d_idle(pScrn, ib); 124} 125 126static void 127reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib) 128{ 129 int i; 130 131 for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) { 132 PACK0(ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3); 133 E32(ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift); 134 E32(ib, MAX_LOD_mask); 135 E32(ib, 0); 136 } 137} 138 139static void 140reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib) 141{ 142 int i; 143 144 const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2); 145 146 PACK0(ib, SQ_ALU_CONSTANT, count); 147 for (i = 0; i < count; i++) 148 EFLOAT(ib, 0.0); 149} 150 151static void 152reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib) 153{ 154 int i; 155 156 157 PACK0(ib, SQ_BOOL_CONST, SQ_BOOL_CONST_all_num); 158 for (i = 0; i < SQ_BOOL_CONST_all_num; i++) 159 E32(ib, 0); 160 161 PACK0(ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num); 162 163 for (i = 0; i < SQ_LOOP_CONST_all_num; i++) 164 E32(ib, 0); 165 166} 167 168void 169start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) 170{ 171 RADEONInfoPtr info = RADEONPTR(pScrn); 172 173 if (info->ChipFamily < CHIP_FAMILY_RV770) { 174 PACK3(ib, IT_START_3D_CMDBUF, 1); 175 E32(ib, 0); 176 } 177 178 PACK3(ib, IT_CONTEXT_CONTROL, 2); 179 E32(ib, 0x80000000); 180 E32(ib, 0x80000000); 181 182 wait_3d_idle_clean (pScrn, ib); 183} 184 185/* 186 * Setup of functional groups 187 */ 188 189// asic stack/thread/gpr limits - need to query the drm 190static void 191sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) 192{ 193 uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 194 uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 195 RADEONInfoPtr info = RADEONPTR(pScrn); 196 197 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 198 (info->ChipFamily == CHIP_FAMILY_RV620) || 199 (info->ChipFamily == CHIP_FAMILY_RS780) || 200 (info->ChipFamily == CHIP_FAMILY_RV710)) 201 sq_config = 0; // no VC 202 else 203 sq_config = VC_ENABLE_bit; 204 205 sq_config |= (DX9_CONSTS_bit | 206 ALU_INST_PREFER_VECTOR_bit | 207 (sq_conf->ps_prio << PS_PRIO_shift) | 208 (sq_conf->vs_prio << VS_PRIO_shift) | 209 (sq_conf->gs_prio << GS_PRIO_shift) | 210 (sq_conf->es_prio << ES_PRIO_shift)); 211 212 sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 213 (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 214 (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 215 sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 216 (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 217 218 sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 219 (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 220 (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 221 (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 222 223 sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 224 (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 225 226 sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 227 (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 228 229 PACK0(ib, SQ_CONFIG, 6); 230 E32(ib, sq_config); 231 E32(ib, sq_gpr_resource_mgmt_1); 232 E32(ib, sq_gpr_resource_mgmt_2); 233 E32(ib, sq_thread_resource_mgmt); 234 E32(ib, sq_stack_resource_mgmt_1); 235 E32(ib, sq_stack_resource_mgmt_2); 236 237} 238 239void 240set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) 241{ 242 uint32_t cb_color_info; 243 int pitch, slice, h; 244 RADEONInfoPtr info = RADEONPTR(pScrn); 245 246 cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 247 (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 248 (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 249 (cb_conf->number_type << NUMBER_TYPE_shift) | 250 (cb_conf->comp_swap << COMP_SWAP_shift) | 251 (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 252 if (cb_conf->read_size) 253 cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 254 if (cb_conf->blend_clamp) 255 cb_color_info |= BLEND_CLAMP_bit; 256 if (cb_conf->clear_color) 257 cb_color_info |= CLEAR_COLOR_bit; 258 if (cb_conf->blend_bypass) 259 cb_color_info |= BLEND_BYPASS_bit; 260 if (cb_conf->blend_float32) 261 cb_color_info |= BLEND_FLOAT32_bit; 262 if (cb_conf->simple_float) 263 cb_color_info |= SIMPLE_FLOAT_bit; 264 if (cb_conf->round_mode) 265 cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 266 if (cb_conf->tile_compact) 267 cb_color_info |= TILE_COMPACT_bit; 268 if (cb_conf->source_format) 269 cb_color_info |= SOURCE_FORMAT_bit; 270 271 pitch = (cb_conf->w / 8) - 1; 272 h = (cb_conf->h + 7) & ~7; 273 slice = ((cb_conf->w * h) / 64) - 1; 274 275 EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 276 277 // rv6xx workaround 278 if ((info->ChipFamily > CHIP_FAMILY_R600) && 279 (info->ChipFamily < CHIP_FAMILY_RV770)) { 280 PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); 281 E32(ib, (2 << cb_conf->id)); 282 } 283 284 // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 285 EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 286 (slice << SLICE_TILE_MAX_shift))); 287 EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 288 (0 << SLICE_MAX_shift))); 289 EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 290 EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 291 EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 292 EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 293 (0 << FMASK_TILE_MAX_shift))); 294} 295 296void 297cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr) 298{ 299 uint32_t cp_coher_size; 300 if (size == 0xffffffff) 301 cp_coher_size = 0xffffffff; 302 else 303 cp_coher_size = ((size + 255) >> 8); 304 305 PACK3(ib, IT_SURFACE_SYNC, 4); 306 E32(ib, sync_type); 307 E32(ib, cp_coher_size); 308 E32(ib, (mc_addr >> 8)); 309 E32(ib, 10); /* poll interval */ 310} 311 312/* inserts a wait for vline in the command stream */ 313void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, 314 int crtc, int start, int stop) 315{ 316 RADEONInfoPtr info = RADEONPTR(pScrn); 317 xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn); 318 uint32_t offset; 319 RADEONCrtcPrivatePtr radeon_crtc; 320 321 if ((crtc < 0) || (crtc > 1)) 322 return; 323 324 if (stop < start) 325 return; 326 327 if (!xf86_config->crtc[crtc]->enabled) 328 return; 329 330#ifdef USE_EXA 331 if (info->useEXA) 332 offset = exaGetPixmapOffset(pPix); 333 else 334#endif 335 offset = pPix->devPrivate.ptr - info->FB; 336 337 /* if drawing to front buffer */ 338 if (offset != 0) 339 return; 340 341 start = max(start, 0); 342 stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay); 343 344 if (start > xf86_config->crtc[crtc]->mode.VDisplay) 345 return; 346 347 radeon_crtc = xf86_config->crtc[crtc]->driver_private; 348 349 /* set the VLINE range */ 350 EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, 351 (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 352 (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 353 354 /* tell the CP to poll the VLINE state register */ 355 PACK3(ib, IT_WAIT_REG_MEM, 6); 356 E32(ib, IT_WAIT_REG | IT_WAIT_EQ); 357 E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset)); 358 E32(ib, 0); 359 E32(ib, 0); // Ref value 360 E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask 361 E32(ib, 10); // Wait interval 362} 363 364void 365fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) 366{ 367 uint32_t sq_pgm_resources; 368 369 sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 370 (fs_conf->stack_size << STACK_SIZE_shift)); 371 372 if (fs_conf->dx10_clamp) 373 sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 374 375 EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 376 EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); 377 EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); 378} 379 380void 381vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) 382{ 383 uint32_t sq_pgm_resources; 384 385 sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 386 (vs_conf->stack_size << STACK_SIZE_shift)); 387 388 if (vs_conf->dx10_clamp) 389 sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 390 if (vs_conf->fetch_cache_lines) 391 sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 392 if (vs_conf->uncached_first_inst) 393 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 394 395 EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 396 EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); 397 EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); 398} 399 400void 401ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) 402{ 403 uint32_t sq_pgm_resources; 404 405 sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 406 (ps_conf->stack_size << STACK_SIZE_shift)); 407 408 if (ps_conf->dx10_clamp) 409 sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 410 if (ps_conf->fetch_cache_lines) 411 sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 412 if (ps_conf->uncached_first_inst) 413 sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 414 if (ps_conf->clamp_consts) 415 sq_pgm_resources |= CLAMP_CONSTS_bit; 416 417 EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 418 EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); 419 EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 420 EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); 421} 422 423void 424set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) 425{ 426 int i; 427 const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 428 429 PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 430 for (i = 0; i < countreg; i++) 431 EFLOAT(ib, const_buf[i]); 432} 433 434void 435set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) 436{ 437 /* bool register order is: ps, vs, gs; one register each 438 * 1 bits per bool; 32 bools each for ps, vs, gs. 439 */ 440 EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 441} 442 443void 444set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) 445{ 446 uint32_t sq_vtx_constant_word2; 447 448 sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 449 ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 450 (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 451 (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 452 (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 453 if (res->clamp_x) 454 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 455 456 if (res->format_comp_all) 457 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 458 459 if (res->srf_mode_all) 460 sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 461 462 PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 463 E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 464 E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE 465 E32(ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 466 E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 467 E32(ib, 0); // 4: n/a 468 E32(ib, 0); // 5: n/a 469 E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 470} 471 472void 473set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) 474{ 475 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 476 uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 477 478 sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 479 (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 480 481 if (tex_res->w) 482 sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | 483 ((tex_res->w - 1) << TEX_WIDTH_shift)); 484 485 if (tex_res->tile_type) 486 sq_tex_resource_word0 |= TILE_TYPE_bit; 487 488 sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 489 490 if (tex_res->h) 491 sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 492 if (tex_res->depth) 493 sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 494 495 sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 496 (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 497 (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 498 (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 499 (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 500 (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 501 (tex_res->request_size << REQUEST_SIZE_shift) | 502 (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 503 (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 504 (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 505 (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 506 (tex_res->base_level << BASE_LEVEL_shift)); 507 508 if (tex_res->srf_mode_all) 509 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 510 if (tex_res->force_degamma) 511 sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 512 513 sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 514 (tex_res->base_array << BASE_ARRAY_shift) | 515 (tex_res->last_array << LAST_ARRAY_shift)); 516 517 sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 518 (tex_res->perf_modulation << PERF_MODULATION_shift) | 519 (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 520 521 if (tex_res->interlaced) 522 sq_tex_resource_word6 |= INTERLACED_bit; 523 524 PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 525 E32(ib, sq_tex_resource_word0); 526 E32(ib, sq_tex_resource_word1); 527 E32(ib, ((tex_res->base) >> 8)); 528 E32(ib, ((tex_res->mip_base) >> 8)); 529 E32(ib, sq_tex_resource_word4); 530 E32(ib, sq_tex_resource_word5); 531 E32(ib, sq_tex_resource_word6); 532} 533 534void 535set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) 536{ 537 uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 538 539 sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 540 (s->clamp_y << CLAMP_Y_shift) | 541 (s->clamp_z << CLAMP_Z_shift) | 542 (s->xy_mag_filter << XY_MAG_FILTER_shift) | 543 (s->xy_min_filter << XY_MIN_FILTER_shift) | 544 (s->z_filter << Z_FILTER_shift) | 545 (s->mip_filter << MIP_FILTER_shift) | 546 (s->border_color << BORDER_COLOR_TYPE_shift) | 547 (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 548 (s->chroma_key << CHROMA_KEY_shift)); 549 if (s->point_sampling_clamp) 550 sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 551 if (s->tex_array_override) 552 sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 553 if (s->lod_uses_minor_axis) 554 sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 555 556 sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 557 (s->max_lod << MAX_LOD_shift) | 558 (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 559 560 sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 561 (s->perf_mip << PERF_MIP_shift) | 562 (s->perf_z << PERF_Z_shift)); 563 if (s->mc_coord_truncate) 564 sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 565 if (s->force_degamma) 566 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 567 if (s->high_precision_filter) 568 sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 569 if (s->fetch_4) 570 sq_tex_sampler_word2 |= FETCH_4_bit; 571 if (s->sample_is_pcf) 572 sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 573 if (s->type) 574 sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 575 576 PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 577 E32(ib, sq_tex_sampler_word0); 578 E32(ib, sq_tex_sampler_word1); 579 E32(ib, sq_tex_sampler_word2); 580} 581 582//XXX deal with clip offsets in clip setup 583void 584set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 585{ 586 587 EREG(ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 588 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 589 EREG(ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 590 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 591} 592 593void 594set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 595{ 596 597 EREG(ib, PA_SC_VPORT_SCISSOR_0_TL + 598 id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 599 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 600 WINDOW_OFFSET_DISABLE_bit)); 601 EREG(ib, PA_SC_VPORT_SCISSOR_0_BR + 602 id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 603 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 604} 605 606void 607set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 608{ 609 610 EREG(ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 611 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 612 WINDOW_OFFSET_DISABLE_bit)); 613 EREG(ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 614 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 615} 616 617void 618set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) 619{ 620 621 EREG(ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 622 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 623 WINDOW_OFFSET_DISABLE_bit)); 624 EREG(ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 625 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 626} 627 628void 629set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) 630{ 631 632 EREG(ib, PA_SC_CLIPRECT_0_TL + 633 id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 634 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 635 EREG(ib, PA_SC_CLIPRECT_0_BR + 636 id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 637 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 638} 639 640/* 641 * Setup of default state 642 */ 643 644void 645set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) 646{ 647 tex_resource_t tex_res; 648 shader_config_t fs_conf; 649 sq_config_t sq_conf; 650 int i; 651 RADEONInfoPtr info = RADEONPTR(pScrn); 652 struct radeon_accel_state *accel_state = info->accel_state; 653 654 memset(&tex_res, 0, sizeof(tex_resource_t)); 655 memset(&fs_conf, 0, sizeof(shader_config_t)); 656 657#if 1 658 if (accel_state->XInited3D) 659 return; 660#endif 661 662 accel_state->XInited3D = TRUE; 663 664 wait_3d_idle(pScrn, ib); 665 666 // ASIC specific setup, see drm 667 if (info->ChipFamily < CHIP_FAMILY_RV770) { 668 EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 669 (28 << TD_FIFO_CREDIT_shift))); 670 EREG(ib, VC_ENHANCE, 0); 671 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 672 EREG(ib, DB_DEBUG, 0x82000000); /* ? */ 673 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 674 (16 << DEPTH_FLUSH_shift) | 675 (0 << FORCE_SUMMARIZE_shift) | 676 (4 << DEPTH_PENDING_FREE_shift) | 677 (16 << DEPTH_CACHELINE_FREE_shift) | 678 0)); 679 } else { 680 EREG(ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 681 (28 << TD_FIFO_CREDIT_shift))); 682 EREG(ib, VC_ENHANCE, 0); 683 EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 684 EREG(ib, DB_DEBUG, 0); 685 EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 686 (16 << DEPTH_FLUSH_shift) | 687 (0 << FORCE_SUMMARIZE_shift) | 688 (4 << DEPTH_PENDING_FREE_shift) | 689 (4 << DEPTH_CACHELINE_FREE_shift) | 690 0)); 691 } 692 693 reset_td_samplers(pScrn, ib); 694 reset_dx9_alu_consts(pScrn, ib); 695 reset_bool_loop_const (pScrn, ib); 696 reset_sampler_const (pScrn, ib); 697 698 // SQ 699 sq_conf.ps_prio = 0; 700 sq_conf.vs_prio = 1; 701 sq_conf.gs_prio = 2; 702 sq_conf.es_prio = 3; 703 // need to set stack/thread/gpr limits based on the asic 704 // for now just set them low enough so any card will work 705 // see r600_cp.c in the drm 706 switch (info->ChipFamily) { 707 case CHIP_FAMILY_R600: 708 sq_conf.num_ps_gprs = 192; 709 sq_conf.num_vs_gprs = 56; 710 sq_conf.num_temp_gprs = 4; 711 sq_conf.num_gs_gprs = 0; 712 sq_conf.num_es_gprs = 0; 713 sq_conf.num_ps_threads = 136; 714 sq_conf.num_vs_threads = 48; 715 sq_conf.num_gs_threads = 4; 716 sq_conf.num_es_threads = 4; 717 sq_conf.num_ps_stack_entries = 128; 718 sq_conf.num_vs_stack_entries = 128; 719 sq_conf.num_gs_stack_entries = 0; 720 sq_conf.num_es_stack_entries = 0; 721 break; 722 case CHIP_FAMILY_RV630: 723 case CHIP_FAMILY_RV635: 724 sq_conf.num_ps_gprs = 84; 725 sq_conf.num_vs_gprs = 36; 726 sq_conf.num_temp_gprs = 4; 727 sq_conf.num_gs_gprs = 0; 728 sq_conf.num_es_gprs = 0; 729 sq_conf.num_ps_threads = 144; 730 sq_conf.num_vs_threads = 40; 731 sq_conf.num_gs_threads = 4; 732 sq_conf.num_es_threads = 4; 733 sq_conf.num_ps_stack_entries = 40; 734 sq_conf.num_vs_stack_entries = 40; 735 sq_conf.num_gs_stack_entries = 32; 736 sq_conf.num_es_stack_entries = 16; 737 break; 738 case CHIP_FAMILY_RV610: 739 case CHIP_FAMILY_RV620: 740 case CHIP_FAMILY_RS780: 741 default: 742 sq_conf.num_ps_gprs = 84; 743 sq_conf.num_vs_gprs = 36; 744 sq_conf.num_temp_gprs = 4; 745 sq_conf.num_gs_gprs = 0; 746 sq_conf.num_es_gprs = 0; 747 sq_conf.num_ps_threads = 136; 748 sq_conf.num_vs_threads = 48; 749 sq_conf.num_gs_threads = 4; 750 sq_conf.num_es_threads = 4; 751 sq_conf.num_ps_stack_entries = 40; 752 sq_conf.num_vs_stack_entries = 40; 753 sq_conf.num_gs_stack_entries = 32; 754 sq_conf.num_es_stack_entries = 16; 755 break; 756 case CHIP_FAMILY_RV670: 757 sq_conf.num_ps_gprs = 144; 758 sq_conf.num_vs_gprs = 40; 759 sq_conf.num_temp_gprs = 4; 760 sq_conf.num_gs_gprs = 0; 761 sq_conf.num_es_gprs = 0; 762 sq_conf.num_ps_threads = 136; 763 sq_conf.num_vs_threads = 48; 764 sq_conf.num_gs_threads = 4; 765 sq_conf.num_es_threads = 4; 766 sq_conf.num_ps_stack_entries = 40; 767 sq_conf.num_vs_stack_entries = 40; 768 sq_conf.num_gs_stack_entries = 32; 769 sq_conf.num_es_stack_entries = 16; 770 break; 771 case CHIP_FAMILY_RV770: 772 sq_conf.num_ps_gprs = 192; 773 sq_conf.num_vs_gprs = 56; 774 sq_conf.num_temp_gprs = 4; 775 sq_conf.num_gs_gprs = 0; 776 sq_conf.num_es_gprs = 0; 777 sq_conf.num_ps_threads = 188; 778 sq_conf.num_vs_threads = 60; 779 sq_conf.num_gs_threads = 0; 780 sq_conf.num_es_threads = 0; 781 sq_conf.num_ps_stack_entries = 256; 782 sq_conf.num_vs_stack_entries = 256; 783 sq_conf.num_gs_stack_entries = 0; 784 sq_conf.num_es_stack_entries = 0; 785 break; 786 case CHIP_FAMILY_RV730: 787 sq_conf.num_ps_gprs = 84; 788 sq_conf.num_vs_gprs = 36; 789 sq_conf.num_temp_gprs = 4; 790 sq_conf.num_gs_gprs = 0; 791 sq_conf.num_es_gprs = 0; 792 sq_conf.num_ps_threads = 188; 793 sq_conf.num_vs_threads = 60; 794 sq_conf.num_gs_threads = 0; 795 sq_conf.num_es_threads = 0; 796 sq_conf.num_ps_stack_entries = 128; 797 sq_conf.num_vs_stack_entries = 128; 798 sq_conf.num_gs_stack_entries = 0; 799 sq_conf.num_es_stack_entries = 0; 800 break; 801 case CHIP_FAMILY_RV710: 802 sq_conf.num_ps_gprs = 192; 803 sq_conf.num_vs_gprs = 56; 804 sq_conf.num_temp_gprs = 4; 805 sq_conf.num_gs_gprs = 0; 806 sq_conf.num_es_gprs = 0; 807 sq_conf.num_ps_threads = 144; 808 sq_conf.num_vs_threads = 48; 809 sq_conf.num_gs_threads = 0; 810 sq_conf.num_es_threads = 0; 811 sq_conf.num_ps_stack_entries = 128; 812 sq_conf.num_vs_stack_entries = 128; 813 sq_conf.num_gs_stack_entries = 0; 814 sq_conf.num_es_stack_entries = 0; 815 break; 816 } 817 818 sq_setup(pScrn, ib, &sq_conf); 819 820 EREG(ib, SQ_VTX_BASE_VTX_LOC, 0); 821 EREG(ib, SQ_VTX_START_INST_LOC, 0); 822 823 PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9); 824 E32(ib, 0); // SQ_ESGS_RING_ITEMSIZE 825 E32(ib, 0); // SQ_GSVS_RING_ITEMSIZE 826 E32(ib, 0); // SQ_ESTMP_RING_ITEMSIZE 827 E32(ib, 0); // SQ_GSTMP_RING_ITEMSIZE 828 E32(ib, 0); // SQ_VSTMP_RING_ITEMSIZE 829 E32(ib, 0); // SQ_PSTMP_RING_ITEMSIZE 830 E32(ib, 0); // SQ_FBUF_RING_ITEMSIZE 831 E32(ib, 0); // SQ_REDUC_RING_ITEMSIZE 832 E32(ib, 0); // SQ_GS_VERT_ITEMSIZE 833 834 // DB 835 EREG(ib, DB_DEPTH_INFO, 0); 836 EREG(ib, DB_STENCIL_CLEAR, 0); 837 EREG(ib, DB_DEPTH_CLEAR, 0); 838 EREG(ib, DB_STENCILREFMASK, 0); 839 EREG(ib, DB_STENCILREFMASK_BF, 0); 840 EREG(ib, DB_DEPTH_CONTROL, 0); 841 EREG(ib, DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 842 if (info->ChipFamily < CHIP_FAMILY_RV770) 843 EREG(ib, DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit); 844 else 845 EREG(ib, DB_RENDER_OVERRIDE, 0); 846 EREG(ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 847 (2 << ALPHA_TO_MASK_OFFSET1_shift) | 848 (2 << ALPHA_TO_MASK_OFFSET2_shift) | 849 (2 << ALPHA_TO_MASK_OFFSET3_shift))); 850 851 // SX 852 EREG(ib, SX_ALPHA_TEST_CONTROL, 0); 853 EREG(ib, SX_ALPHA_REF, 0); 854 855 // CB 856 reset_cb(pScrn, ib); 857 858 PACK0(ib, CB_BLEND_RED, 4); 859 E32(ib, 0x00000000); 860 E32(ib, 0x00000000); 861 E32(ib, 0x00000000); 862 E32(ib, 0x00000000); 863 864 /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */ 865 // RV6xx+ have per-MRT blend 866 if (info->ChipFamily > CHIP_FAMILY_R600) { 867 PACK0(ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num); 868 for (i = 0; i < CB_BLEND0_CONTROL_num; i++) 869 E32(ib, 0); 870 } 871 872 EREG(ib, CB_BLEND_CONTROL, 0); 873 874 if (info->ChipFamily < CHIP_FAMILY_RV770) { 875 PACK0(ib, CB_FOG_RED, 3); 876 E32(ib, 0x00000000); 877 E32(ib, 0x00000000); 878 E32(ib, 0x00000000); 879 } 880 881 EREG(ib, CB_COLOR_CONTROL, 0); 882 PACK0(ib, CB_CLRCMP_CONTROL, 4); 883 E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 884 E32(ib, 0); // CB_CLRCMP_SRC 885 E32(ib, 0); // CB_CLRCMP_DST 886 E32(ib, 0); // CB_CLRCMP_MSK 887 888 889 if (info->ChipFamily < CHIP_FAMILY_RV770) { 890 PACK0(ib, CB_CLEAR_RED, 4); 891 EFLOAT(ib, 1.0); /* WTF? */ 892 EFLOAT(ib, 0.0); 893 EFLOAT(ib, 1.0); 894 EFLOAT(ib, 1.0); 895 } 896 EREG(ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); 897 898 // SC 899 set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192); 900 set_screen_scissor(pScrn, ib, 0, 0, 8192, 8192); 901 EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 902 (0 << WINDOW_Y_OFFSET_shift))); 903 set_window_scissor(pScrn, ib, 0, 0, 8192, 8192); 904 905 EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 906 907 /* clip boolean is set to always visible -> doesn't matter */ 908 for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 909 set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); 910 911 if (info->ChipFamily < CHIP_FAMILY_RV770) 912 EREG(ib, R7xx_PA_SC_EDGERULE, 0x00000000); 913 else 914 EREG(ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 915 916 for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) { 917 set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); 918 PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2); 919 EFLOAT(ib, 0.0); 920 EFLOAT(ib, 1.0); 921 } 922 923 if (info->ChipFamily < CHIP_FAMILY_RV770) 924 EREG(ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 925 else 926 EREG(ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 927 0x00500000)); /* ? */ 928 929 EREG(ib, PA_SC_LINE_CNTL, 0); 930 EREG(ib, PA_SC_AA_CONFIG, 0); 931 EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); 932 933 //XXX: double check this 934 if (info->ChipFamily > CHIP_FAMILY_R600) { 935 EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0); 936 EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0); 937 } 938 939 EREG(ib, PA_SC_LINE_STIPPLE, 0); 940 EREG(ib, PA_SC_MPASS_PS_CNTL, 0); 941 942 // CL 943 PACK0(ib, PA_CL_VPORT_XSCALE_0, 6); 944 EFLOAT(ib, 0.0f); // PA_CL_VPORT_XSCALE 945 EFLOAT(ib, 0.0f); // PA_CL_VPORT_XOFFSET 946 EFLOAT(ib, 0.0f); // PA_CL_VPORT_YSCALE 947 EFLOAT(ib, 0.0f); // PA_CL_VPORT_YOFFSET 948 EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZSCALE 949 EFLOAT(ib, 0.0f); // PA_CL_VPORT_ZOFFSET 950 EREG(ib, PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit)); 951 EREG(ib, PA_CL_VTE_CNTL, 0); 952 EREG(ib, PA_CL_VS_OUT_CNTL, 0); 953 EREG(ib, PA_CL_NANINF_CNTL, 0); 954 PACK0(ib, PA_CL_GB_VERT_CLIP_ADJ, 4); 955 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ 956 EFLOAT(ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ 957 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ 958 EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ 959 960 /* user clipping planes are disabled by default */ 961 PACK0(ib, PA_CL_UCP_0_X, 24); 962 for (i = 0; i < 24; i++) 963 EFLOAT(ib, 0.0); 964 965 // SU 966 EREG(ib, PA_SU_SC_MODE_CNTL, FACE_bit); 967 EREG(ib, PA_SU_POINT_SIZE, 0); 968 EREG(ib, PA_SU_POINT_MINMAX, 0); 969 EREG(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0); 970 EREG(ib, PA_SU_POLY_OFFSET_BACK_SCALE, 0); 971 EREG(ib, PA_SU_POLY_OFFSET_FRONT_SCALE, 0); 972 EREG(ib, PA_SU_POLY_OFFSET_BACK_OFFSET, 0); 973 EREG(ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0); 974 975 EREG(ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ 976 EREG(ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | 977 (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 978 EREG(ib, PA_SU_POLY_OFFSET_CLAMP, 0); 979 980 // SPI 981 if (info->ChipFamily < CHIP_FAMILY_RV770) 982 EREG(ib, R7xx_SPI_THREAD_GROUPING, 0); 983 else 984 EREG(ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 985 986 EREG(ib, SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) | 987 (3 << PNT_SPRITE_OVRD_Y_shift) | 988 (0 << PNT_SPRITE_OVRD_Z_shift) | 989 (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */ 990 EREG(ib, SPI_INPUT_Z, 0); 991 EREG(ib, SPI_FOG_CNTL, 0); 992 EREG(ib, SPI_FOG_FUNC_SCALE, 0); 993 EREG(ib, SPI_FOG_FUNC_BIAS, 0); 994 995 PACK0(ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num); 996 for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */ 997 E32(ib, 0x03020100 + i*0x04040404); 998 EREG(ib, SPI_VS_OUT_CONFIG, 0); 999 1000 // clear FS 1001 fs_setup(pScrn, ib, &fs_conf); 1002 1003 // VGT 1004 EREG(ib, VGT_MAX_VTX_INDX, 2048); /* XXX set to a reasonably large number of indices */ 1005 EREG(ib, VGT_MIN_VTX_INDX, 0); 1006 EREG(ib, VGT_INDX_OFFSET, 0); 1007 EREG(ib, VGT_INSTANCE_STEP_RATE_0, 0); 1008 EREG(ib, VGT_INSTANCE_STEP_RATE_1, 0); 1009 1010 EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0); 1011 EREG(ib, VGT_OUTPUT_PATH_CNTL, 0); 1012 EREG(ib, VGT_GS_MODE, 0); 1013 EREG(ib, VGT_HOS_CNTL, 0); 1014 EREG(ib, VGT_HOS_MAX_TESS_LEVEL, 0); 1015 EREG(ib, VGT_HOS_MIN_TESS_LEVEL, 0); 1016 EREG(ib, VGT_HOS_REUSE_DEPTH, 0); 1017 EREG(ib, VGT_GROUP_PRIM_TYPE, 0); 1018 EREG(ib, VGT_GROUP_FIRST_DECR, 0); 1019 EREG(ib, VGT_GROUP_DECR, 0); 1020 EREG(ib, VGT_GROUP_VECT_0_CNTL, 0); 1021 EREG(ib, VGT_GROUP_VECT_1_CNTL, 0); 1022 EREG(ib, VGT_GROUP_VECT_0_FMT_CNTL, 0); 1023 EREG(ib, VGT_GROUP_VECT_1_FMT_CNTL, 0); 1024 EREG(ib, VGT_PRIMITIVEID_EN, 0); 1025 EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN, 0); 1026 EREG(ib, VGT_STRMOUT_EN, 0); 1027 EREG(ib, VGT_REUSE_OFF, 0); 1028 EREG(ib, VGT_VTX_CNT_EN, 0); 1029 EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); 1030 1031 // clear tex resources - PS 1032 for (i = 0; i < 16; i++) { 1033 tex_res.id = i; 1034 set_tex_resource(pScrn, ib, &tex_res); 1035 } 1036 1037 // clear tex resources - VS 1038 for (i = 160; i < 164; i++) { 1039 tex_res.id = i; 1040 set_tex_resource(pScrn, ib, &tex_res); 1041 } 1042 1043 // clear tex resources - FS 1044 for (i = 320; i < 335; i++) { 1045 tex_res.id = i; 1046 set_tex_resource(pScrn, ib, &tex_res); 1047 } 1048 1049} 1050 1051 1052/* 1053 * Commands 1054 */ 1055 1056void 1057draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) 1058{ 1059 uint32_t i, count; 1060 1061 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1062 PACK3(ib, IT_INDEX_TYPE, 1); 1063 E32(ib, draw_conf->index_type); 1064 PACK3(ib, IT_NUM_INSTANCES, 1); 1065 E32(ib, draw_conf->num_instances); 1066 1067 // calculate num of packets 1068 count = 2; 1069 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1070 count += (draw_conf->num_indices + 1) / 2; 1071 else 1072 count += draw_conf->num_indices; 1073 1074 PACK3(ib, IT_DRAW_INDEX_IMMD, count); 1075 E32(ib, draw_conf->num_indices); 1076 E32(ib, draw_conf->vgt_draw_initiator); 1077 1078 if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1079 for (i = 0; i < draw_conf->num_indices; i += 2) { 1080 if ((i + 1) == draw_conf->num_indices) 1081 E32(ib, indices[i]); 1082 else 1083 E32(ib, (indices[i] | (indices[i + 1] << 16))); 1084 } 1085 } else { 1086 for (i = 0; i < draw_conf->num_indices; i++) 1087 E32(ib, indices[i]); 1088 } 1089} 1090 1091void 1092draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) 1093{ 1094 1095 EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1096 PACK3(ib, IT_INDEX_TYPE, 1); 1097 E32(ib, draw_conf->index_type); 1098 PACK3(ib, IT_NUM_INSTANCES, 1); 1099 E32(ib, draw_conf->num_instances); 1100 PACK3(ib, IT_DRAW_INDEX_AUTO, 2); 1101 E32(ib, draw_conf->num_indices); 1102 E32(ib, draw_conf->vgt_draw_initiator); 1103} 1104