r6xx_accel.c revision de2362d3
1de2362d3Smrg/* 2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc. 3de2362d3Smrg * 4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5de2362d3Smrg * copy of this software and associated documentation files (the "Software"), 6de2362d3Smrg * to deal in the Software without restriction, including without limitation 7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the 9de2362d3Smrg * Software is furnished to do so, subject to the following conditions: 10de2362d3Smrg * 11de2362d3Smrg * The above copyright notice and this permission notice (including the next 12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the 13de2362d3Smrg * Software. 14de2362d3Smrg * 15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21de2362d3Smrg * SOFTWARE. 22de2362d3Smrg * 23de2362d3Smrg * Authors: Alex Deucher <alexander.deucher@amd.com> 24de2362d3Smrg * Matthias Hopf <mhopf@suse.de> 25de2362d3Smrg */ 26de2362d3Smrg#ifdef HAVE_CONFIG_H 27de2362d3Smrg#include "config.h" 28de2362d3Smrg#endif 29de2362d3Smrg 30de2362d3Smrg#include "xf86.h" 31de2362d3Smrg 32de2362d3Smrg#include <errno.h> 33de2362d3Smrg 34de2362d3Smrg#include "radeon.h" 35de2362d3Smrg#include "r600_shader.h" 36de2362d3Smrg#include "radeon_reg.h" 37de2362d3Smrg#include "r600_reg.h" 38de2362d3Smrg#include "r600_state.h" 39de2362d3Smrg 40de2362d3Smrg#include "radeon_vbo.h" 41de2362d3Smrg#include "radeon_exa_shared.h" 42de2362d3Smrg 43de2362d3Smrgstatic const uint32_t R600_ROP[16] = { 44de2362d3Smrg RADEON_ROP3_ZERO, /* GXclear */ 45de2362d3Smrg RADEON_ROP3_DSa, /* Gxand */ 46de2362d3Smrg RADEON_ROP3_SDna, /* GXandReverse */ 47de2362d3Smrg RADEON_ROP3_S, /* GXcopy */ 48de2362d3Smrg RADEON_ROP3_DSna, /* GXandInverted */ 49de2362d3Smrg RADEON_ROP3_D, /* GXnoop */ 50de2362d3Smrg RADEON_ROP3_DSx, /* GXxor */ 51de2362d3Smrg RADEON_ROP3_DSo, /* GXor */ 52de2362d3Smrg RADEON_ROP3_DSon, /* GXnor */ 53de2362d3Smrg RADEON_ROP3_DSxn, /* GXequiv */ 54de2362d3Smrg RADEON_ROP3_Dn, /* GXinvert */ 55de2362d3Smrg RADEON_ROP3_SDno, /* GXorReverse */ 56de2362d3Smrg RADEON_ROP3_Sn, /* GXcopyInverted */ 57de2362d3Smrg RADEON_ROP3_DSno, /* GXorInverted */ 58de2362d3Smrg RADEON_ROP3_DSan, /* GXnand */ 59de2362d3Smrg RADEON_ROP3_ONE, /* GXset */ 60de2362d3Smrg}; 61de2362d3Smrg 62de2362d3Smrg/* we try and batch operations together under KMS - 63de2362d3Smrg but it doesn't work yet without misrendering */ 64de2362d3Smrg#define KMS_MULTI_OP 1 65de2362d3Smrg 66de2362d3Smrg/* Flush the indirect buffer to the kernel for submission to the card */ 67de2362d3Smrgvoid R600CPFlushIndirect(ScrnInfoPtr pScrn) 68de2362d3Smrg{ 69de2362d3Smrg radeon_cs_flush_indirect(pScrn); 70de2362d3Smrg} 71de2362d3Smrg 72de2362d3Smrgvoid R600IBDiscard(ScrnInfoPtr pScrn) 73de2362d3Smrg{ 74de2362d3Smrg radeon_ib_discard(pScrn); 75de2362d3Smrg} 76de2362d3Smrg 77de2362d3Smrgvoid 78de2362d3Smrgr600_wait_3d_idle_clean(ScrnInfoPtr pScrn) 79de2362d3Smrg{ 80de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 81de2362d3Smrg 82de2362d3Smrg //flush caches, don't generate timestamp 83de2362d3Smrg BEGIN_BATCH(5); 84de2362d3Smrg PACK3(IT_EVENT_WRITE, 1); 85de2362d3Smrg E32(CACHE_FLUSH_AND_INV_EVENT); 86de2362d3Smrg // wait for 3D idle clean 87de2362d3Smrg EREG(WAIT_UNTIL, (WAIT_3D_IDLE_bit | 88de2362d3Smrg WAIT_3D_IDLECLEAN_bit)); 89de2362d3Smrg END_BATCH(); 90de2362d3Smrg} 91de2362d3Smrg 92de2362d3Smrgvoid 93de2362d3Smrgr600_wait_3d_idle(ScrnInfoPtr pScrn) 94de2362d3Smrg{ 95de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 96de2362d3Smrg 97de2362d3Smrg BEGIN_BATCH(3); 98de2362d3Smrg EREG(WAIT_UNTIL, WAIT_3D_IDLE_bit); 99de2362d3Smrg END_BATCH(); 100de2362d3Smrg} 101de2362d3Smrg 102de2362d3Smrgvoid 103de2362d3Smrgr600_start_3d(ScrnInfoPtr pScrn) 104de2362d3Smrg{ 105de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 106de2362d3Smrg 107de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) { 108de2362d3Smrg BEGIN_BATCH(5); 109de2362d3Smrg PACK3(IT_START_3D_CMDBUF, 1); 110de2362d3Smrg E32(0); 111de2362d3Smrg } else 112de2362d3Smrg BEGIN_BATCH(3); 113de2362d3Smrg 114de2362d3Smrg PACK3(IT_CONTEXT_CONTROL, 2); 115de2362d3Smrg E32(0x80000000); 116de2362d3Smrg E32(0x80000000); 117de2362d3Smrg END_BATCH(); 118de2362d3Smrg 119de2362d3Smrg} 120de2362d3Smrg 121de2362d3Smrg/* 122de2362d3Smrg * Setup of functional groups 123de2362d3Smrg */ 124de2362d3Smrg 125de2362d3Smrg// asic stack/thread/gpr limits - need to query the drm 126de2362d3Smrgstatic void 127de2362d3Smrgr600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 128de2362d3Smrg{ 129de2362d3Smrg uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 130de2362d3Smrg uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 131de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 132de2362d3Smrg 133de2362d3Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 134de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 135de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 136de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 137de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 138de2362d3Smrg sq_config = 0; // no VC 139de2362d3Smrg else 140de2362d3Smrg sq_config = VC_ENABLE_bit; 141de2362d3Smrg 142de2362d3Smrg sq_config |= (DX9_CONSTS_bit | 143de2362d3Smrg ALU_INST_PREFER_VECTOR_bit | 144de2362d3Smrg (sq_conf->ps_prio << PS_PRIO_shift) | 145de2362d3Smrg (sq_conf->vs_prio << VS_PRIO_shift) | 146de2362d3Smrg (sq_conf->gs_prio << GS_PRIO_shift) | 147de2362d3Smrg (sq_conf->es_prio << ES_PRIO_shift)); 148de2362d3Smrg 149de2362d3Smrg sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 150de2362d3Smrg (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 151de2362d3Smrg (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 152de2362d3Smrg sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 153de2362d3Smrg (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 154de2362d3Smrg 155de2362d3Smrg sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 156de2362d3Smrg (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 157de2362d3Smrg (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 158de2362d3Smrg (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 159de2362d3Smrg 160de2362d3Smrg sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 161de2362d3Smrg (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 162de2362d3Smrg 163de2362d3Smrg sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 164de2362d3Smrg (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 165de2362d3Smrg 166de2362d3Smrg BEGIN_BATCH(8); 167de2362d3Smrg PACK0(SQ_CONFIG, 6); 168de2362d3Smrg E32(sq_config); 169de2362d3Smrg E32(sq_gpr_resource_mgmt_1); 170de2362d3Smrg E32(sq_gpr_resource_mgmt_2); 171de2362d3Smrg E32(sq_thread_resource_mgmt); 172de2362d3Smrg E32(sq_stack_resource_mgmt_1); 173de2362d3Smrg E32(sq_stack_resource_mgmt_2); 174de2362d3Smrg END_BATCH(); 175de2362d3Smrg} 176de2362d3Smrg 177de2362d3Smrgvoid 178de2362d3Smrgr600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 179de2362d3Smrg{ 180de2362d3Smrg uint32_t cb_color_info, cb_color_control; 181de2362d3Smrg unsigned pitch, slice, h, array_mode; 182de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 183de2362d3Smrg 184de2362d3Smrg 185de2362d3Smrg if (cb_conf->surface) { 186de2362d3Smrg switch (cb_conf->surface->level[0].mode) { 187de2362d3Smrg case RADEON_SURF_MODE_1D: 188de2362d3Smrg array_mode = 2; 189de2362d3Smrg break; 190de2362d3Smrg case RADEON_SURF_MODE_2D: 191de2362d3Smrg array_mode = 4; 192de2362d3Smrg break; 193de2362d3Smrg default: 194de2362d3Smrg array_mode = 0; 195de2362d3Smrg break; 196de2362d3Smrg } 197de2362d3Smrg pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1; 198de2362d3Smrg slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1; 199de2362d3Smrg } else 200de2362d3Smrg { 201de2362d3Smrg array_mode = cb_conf->array_mode; 202de2362d3Smrg pitch = (cb_conf->w / 8) - 1; 203de2362d3Smrg h = RADEON_ALIGN(cb_conf->h, 8); 204de2362d3Smrg slice = ((cb_conf->w * h) / 64) - 1; 205de2362d3Smrg } 206de2362d3Smrg 207de2362d3Smrg cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 208de2362d3Smrg (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 209de2362d3Smrg (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 210de2362d3Smrg (cb_conf->number_type << NUMBER_TYPE_shift) | 211de2362d3Smrg (cb_conf->comp_swap << COMP_SWAP_shift) | 212de2362d3Smrg (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 213de2362d3Smrg if (cb_conf->read_size) 214de2362d3Smrg cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 215de2362d3Smrg if (cb_conf->blend_clamp) 216de2362d3Smrg cb_color_info |= BLEND_CLAMP_bit; 217de2362d3Smrg if (cb_conf->clear_color) 218de2362d3Smrg cb_color_info |= CLEAR_COLOR_bit; 219de2362d3Smrg if (cb_conf->blend_bypass) 220de2362d3Smrg cb_color_info |= BLEND_BYPASS_bit; 221de2362d3Smrg if (cb_conf->blend_float32) 222de2362d3Smrg cb_color_info |= BLEND_FLOAT32_bit; 223de2362d3Smrg if (cb_conf->simple_float) 224de2362d3Smrg cb_color_info |= SIMPLE_FLOAT_bit; 225de2362d3Smrg if (cb_conf->round_mode) 226de2362d3Smrg cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 227de2362d3Smrg if (cb_conf->tile_compact) 228de2362d3Smrg cb_color_info |= TILE_COMPACT_bit; 229de2362d3Smrg if (cb_conf->source_format) 230de2362d3Smrg cb_color_info |= SOURCE_FORMAT_bit; 231de2362d3Smrg 232de2362d3Smrg BEGIN_BATCH(3 + 2); 233de2362d3Smrg EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 234de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 235de2362d3Smrg END_BATCH(); 236de2362d3Smrg 237de2362d3Smrg // rv6xx workaround 238de2362d3Smrg if ((info->ChipFamily > CHIP_FAMILY_R600) && 239de2362d3Smrg (info->ChipFamily < CHIP_FAMILY_RV770)) { 240de2362d3Smrg BEGIN_BATCH(2); 241de2362d3Smrg PACK3(IT_SURFACE_BASE_UPDATE, 1); 242de2362d3Smrg E32((2 << cb_conf->id)); 243de2362d3Smrg END_BATCH(); 244de2362d3Smrg } 245de2362d3Smrg /* Set CMASK & TILE buffer to the offset of color buffer as 246de2362d3Smrg * we don't use those this shouldn't cause any issue and we 247de2362d3Smrg * then have a valid cmd stream 248de2362d3Smrg */ 249de2362d3Smrg BEGIN_BATCH(3 + 2); 250de2362d3Smrg EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 251de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 252de2362d3Smrg END_BATCH(); 253de2362d3Smrg BEGIN_BATCH(3 + 2); 254de2362d3Smrg EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 255de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 256de2362d3Smrg END_BATCH(); 257de2362d3Smrg BEGIN_BATCH(9); 258de2362d3Smrg // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 259de2362d3Smrg EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 260de2362d3Smrg (slice << SLICE_TILE_MAX_shift))); 261de2362d3Smrg EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 262de2362d3Smrg (0 << SLICE_MAX_shift))); 263de2362d3Smrg EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 264de2362d3Smrg (0 << FMASK_TILE_MAX_shift))); 265de2362d3Smrg END_BATCH(); 266de2362d3Smrg 267de2362d3Smrg BEGIN_BATCH(3 + 2); 268de2362d3Smrg EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 269de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 270de2362d3Smrg END_BATCH(); 271de2362d3Smrg 272de2362d3Smrg BEGIN_BATCH(9); 273de2362d3Smrg EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 274de2362d3Smrg cb_color_control = R600_ROP[cb_conf->rop] | 275de2362d3Smrg (cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift); 276de2362d3Smrg if (info->ChipFamily == CHIP_FAMILY_R600) { 277de2362d3Smrg /* no per-MRT blend on R600 */ 278de2362d3Smrg EREG(CB_COLOR_CONTROL, cb_color_control); 279de2362d3Smrg EREG(CB_BLEND_CONTROL, cb_conf->blendcntl); 280de2362d3Smrg } else { 281de2362d3Smrg if (cb_conf->blend_enable) 282de2362d3Smrg cb_color_control |= PER_MRT_BLEND_bit; 283de2362d3Smrg EREG(CB_COLOR_CONTROL, cb_color_control); 284de2362d3Smrg EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 285de2362d3Smrg } 286de2362d3Smrg END_BATCH(); 287de2362d3Smrg} 288de2362d3Smrg 289de2362d3Smrgstatic void 290de2362d3Smrgr600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 291de2362d3Smrg uint32_t size, uint64_t mc_addr, 292de2362d3Smrg struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 293de2362d3Smrg{ 294de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 295de2362d3Smrg uint32_t cp_coher_size; 296de2362d3Smrg if (size == 0xffffffff) 297de2362d3Smrg cp_coher_size = 0xffffffff; 298de2362d3Smrg else 299de2362d3Smrg cp_coher_size = ((size + 255) >> 8); 300de2362d3Smrg 301de2362d3Smrg BEGIN_BATCH(5 + 2); 302de2362d3Smrg PACK3(IT_SURFACE_SYNC, 4); 303de2362d3Smrg E32(sync_type); 304de2362d3Smrg E32(cp_coher_size); 305de2362d3Smrg E32((mc_addr >> 8)); 306de2362d3Smrg E32(10); /* poll interval */ 307de2362d3Smrg RELOC_BATCH(bo, rdomains, wdomain); 308de2362d3Smrg END_BATCH(); 309de2362d3Smrg} 310de2362d3Smrg 311de2362d3Smrg/* inserts a wait for vline in the command stream */ 312de2362d3Smrgvoid 313de2362d3Smrgr600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 314de2362d3Smrg xf86CrtcPtr crtc, int start, int stop) 315de2362d3Smrg{ 316de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 317de2362d3Smrg drmmode_crtc_private_ptr drmmode_crtc; 318de2362d3Smrg 319de2362d3Smrg if (!crtc) 320de2362d3Smrg return; 321de2362d3Smrg 322de2362d3Smrg if (!crtc->enabled) 323de2362d3Smrg return; 324de2362d3Smrg 325de2362d3Smrg if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 326de2362d3Smrg return; 327de2362d3Smrg 328de2362d3Smrg start = max(start, crtc->y); 329de2362d3Smrg stop = min(stop, crtc->y + crtc->mode.VDisplay); 330de2362d3Smrg 331de2362d3Smrg if (start >= stop) 332de2362d3Smrg return; 333de2362d3Smrg 334de2362d3Smrg drmmode_crtc = crtc->driver_private; 335de2362d3Smrg 336de2362d3Smrg BEGIN_BATCH(11); 337de2362d3Smrg /* set the VLINE range */ 338de2362d3Smrg EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 339de2362d3Smrg (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 340de2362d3Smrg (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 341de2362d3Smrg 342de2362d3Smrg /* tell the CP to poll the VLINE state register */ 343de2362d3Smrg PACK3(IT_WAIT_REG_MEM, 6); 344de2362d3Smrg E32(IT_WAIT_REG | IT_WAIT_EQ); 345de2362d3Smrg E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 346de2362d3Smrg E32(0); 347de2362d3Smrg E32(0); // Ref value 348de2362d3Smrg E32(AVIVO_D1MODE_VLINE_STAT); // Mask 349de2362d3Smrg E32(10); // Wait interval 350de2362d3Smrg /* add crtc reloc */ 351de2362d3Smrg PACK3(IT_NOP, 1); 352de2362d3Smrg E32(drmmode_crtc->mode_crtc->crtc_id); 353de2362d3Smrg END_BATCH(); 354de2362d3Smrg} 355de2362d3Smrg 356de2362d3Smrgvoid 357de2362d3Smrgr600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 358de2362d3Smrg{ 359de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 360de2362d3Smrg 361de2362d3Smrg BEGIN_BATCH(8); 362de2362d3Smrg /* Interpolator setup */ 363de2362d3Smrg EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 364de2362d3Smrg PACK0(SPI_PS_IN_CONTROL_0, 3); 365de2362d3Smrg E32((num_interp << NUM_INTERP_shift)); 366de2362d3Smrg E32(0); 367de2362d3Smrg E32(0); 368de2362d3Smrg END_BATCH(); 369de2362d3Smrg} 370de2362d3Smrg 371de2362d3Smrgvoid 372de2362d3Smrgr600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 373de2362d3Smrg{ 374de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 375de2362d3Smrg uint32_t sq_pgm_resources; 376de2362d3Smrg 377de2362d3Smrg sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 378de2362d3Smrg (fs_conf->stack_size << STACK_SIZE_shift)); 379de2362d3Smrg 380de2362d3Smrg if (fs_conf->dx10_clamp) 381de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 382de2362d3Smrg 383de2362d3Smrg BEGIN_BATCH(3 + 2); 384de2362d3Smrg EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 385de2362d3Smrg RELOC_BATCH(fs_conf->bo, domain, 0); 386de2362d3Smrg END_BATCH(); 387de2362d3Smrg 388de2362d3Smrg BEGIN_BATCH(6); 389de2362d3Smrg EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 390de2362d3Smrg EREG(SQ_PGM_CF_OFFSET_FS, 0); 391de2362d3Smrg END_BATCH(); 392de2362d3Smrg} 393de2362d3Smrg 394de2362d3Smrgvoid 395de2362d3Smrgr600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 396de2362d3Smrg{ 397de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 398de2362d3Smrg uint32_t sq_pgm_resources; 399de2362d3Smrg 400de2362d3Smrg sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 401de2362d3Smrg (vs_conf->stack_size << STACK_SIZE_shift)); 402de2362d3Smrg 403de2362d3Smrg if (vs_conf->dx10_clamp) 404de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 405de2362d3Smrg if (vs_conf->fetch_cache_lines) 406de2362d3Smrg sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 407de2362d3Smrg if (vs_conf->uncached_first_inst) 408de2362d3Smrg sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 409de2362d3Smrg 410de2362d3Smrg /* flush SQ cache */ 411de2362d3Smrg r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 412de2362d3Smrg vs_conf->shader_size, vs_conf->shader_addr, 413de2362d3Smrg vs_conf->bo, domain, 0); 414de2362d3Smrg 415de2362d3Smrg BEGIN_BATCH(3 + 2); 416de2362d3Smrg EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 417de2362d3Smrg RELOC_BATCH(vs_conf->bo, domain, 0); 418de2362d3Smrg END_BATCH(); 419de2362d3Smrg 420de2362d3Smrg BEGIN_BATCH(6); 421de2362d3Smrg EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources); 422de2362d3Smrg EREG(SQ_PGM_CF_OFFSET_VS, 0); 423de2362d3Smrg END_BATCH(); 424de2362d3Smrg} 425de2362d3Smrg 426de2362d3Smrgvoid 427de2362d3Smrgr600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 428de2362d3Smrg{ 429de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 430de2362d3Smrg uint32_t sq_pgm_resources; 431de2362d3Smrg 432de2362d3Smrg sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 433de2362d3Smrg (ps_conf->stack_size << STACK_SIZE_shift)); 434de2362d3Smrg 435de2362d3Smrg if (ps_conf->dx10_clamp) 436de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 437de2362d3Smrg if (ps_conf->fetch_cache_lines) 438de2362d3Smrg sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 439de2362d3Smrg if (ps_conf->uncached_first_inst) 440de2362d3Smrg sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 441de2362d3Smrg if (ps_conf->clamp_consts) 442de2362d3Smrg sq_pgm_resources |= CLAMP_CONSTS_bit; 443de2362d3Smrg 444de2362d3Smrg /* flush SQ cache */ 445de2362d3Smrg r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 446de2362d3Smrg ps_conf->shader_size, ps_conf->shader_addr, 447de2362d3Smrg ps_conf->bo, domain, 0); 448de2362d3Smrg 449de2362d3Smrg BEGIN_BATCH(3 + 2); 450de2362d3Smrg EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 451de2362d3Smrg RELOC_BATCH(ps_conf->bo, domain, 0); 452de2362d3Smrg END_BATCH(); 453de2362d3Smrg 454de2362d3Smrg BEGIN_BATCH(9); 455de2362d3Smrg EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources); 456de2362d3Smrg EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 457de2362d3Smrg EREG(SQ_PGM_CF_OFFSET_PS, 0); 458de2362d3Smrg END_BATCH(); 459de2362d3Smrg} 460de2362d3Smrg 461de2362d3Smrgvoid 462de2362d3Smrgr600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf) 463de2362d3Smrg{ 464de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 465de2362d3Smrg int i; 466de2362d3Smrg const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 467de2362d3Smrg 468de2362d3Smrg BEGIN_BATCH(2 + countreg); 469de2362d3Smrg PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 470de2362d3Smrg for (i = 0; i < countreg; i++) 471de2362d3Smrg EFLOAT(const_buf[i]); 472de2362d3Smrg END_BATCH(); 473de2362d3Smrg} 474de2362d3Smrg 475de2362d3Smrgvoid 476de2362d3Smrgr600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 477de2362d3Smrg{ 478de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 479de2362d3Smrg /* bool register order is: ps, vs, gs; one register each 480de2362d3Smrg * 1 bits per bool; 32 bools each for ps, vs, gs. 481de2362d3Smrg */ 482de2362d3Smrg BEGIN_BATCH(3); 483de2362d3Smrg EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 484de2362d3Smrg END_BATCH(); 485de2362d3Smrg} 486de2362d3Smrg 487de2362d3Smrgstatic void 488de2362d3Smrgr600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 489de2362d3Smrg{ 490de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 491de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 492de2362d3Smrg uint32_t sq_vtx_constant_word2; 493de2362d3Smrg 494de2362d3Smrg sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 495de2362d3Smrg ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 496de2362d3Smrg (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 497de2362d3Smrg (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 498de2362d3Smrg (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 499de2362d3Smrg if (res->clamp_x) 500de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 501de2362d3Smrg 502de2362d3Smrg if (res->format_comp_all) 503de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 504de2362d3Smrg 505de2362d3Smrg if (res->srf_mode_all) 506de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 507de2362d3Smrg 508de2362d3Smrg /* flush vertex cache */ 509de2362d3Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 510de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 511de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 512de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 513de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 514de2362d3Smrg r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 515de2362d3Smrg accel_state->vbo.vb_offset, 0, 516de2362d3Smrg res->bo, 517de2362d3Smrg domain, 0); 518de2362d3Smrg else 519de2362d3Smrg r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 520de2362d3Smrg accel_state->vbo.vb_offset, 0, 521de2362d3Smrg res->bo, 522de2362d3Smrg domain, 0); 523de2362d3Smrg 524de2362d3Smrg BEGIN_BATCH(9 + 2); 525de2362d3Smrg PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 526de2362d3Smrg E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 527de2362d3Smrg E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 528de2362d3Smrg E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 529de2362d3Smrg E32(res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 530de2362d3Smrg E32(0); // 4: n/a 531de2362d3Smrg E32(0); // 5: n/a 532de2362d3Smrg E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 533de2362d3Smrg RELOC_BATCH(res->bo, domain, 0); 534de2362d3Smrg END_BATCH(); 535de2362d3Smrg} 536de2362d3Smrg 537de2362d3Smrgvoid 538de2362d3Smrgr600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 539de2362d3Smrg{ 540de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 541de2362d3Smrg uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 542de2362d3Smrg uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 543de2362d3Smrg uint32_t array_mode, pitch; 544de2362d3Smrg 545de2362d3Smrg if (tex_res->surface) { 546de2362d3Smrg switch (tex_res->surface->level[0].mode) { 547de2362d3Smrg case RADEON_SURF_MODE_1D: 548de2362d3Smrg array_mode = 2; 549de2362d3Smrg break; 550de2362d3Smrg case RADEON_SURF_MODE_2D: 551de2362d3Smrg array_mode = 4; 552de2362d3Smrg break; 553de2362d3Smrg default: 554de2362d3Smrg array_mode = 0; 555de2362d3Smrg break; 556de2362d3Smrg } 557de2362d3Smrg pitch = tex_res->surface->level[0].nblk_x >> 3; 558de2362d3Smrg } else 559de2362d3Smrg { 560de2362d3Smrg array_mode = tex_res->tile_mode; 561de2362d3Smrg pitch = (tex_res->pitch + 7) >> 3; 562de2362d3Smrg } 563de2362d3Smrg 564de2362d3Smrg sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 565de2362d3Smrg (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 566de2362d3Smrg 567de2362d3Smrg if (tex_res->w) 568de2362d3Smrg sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) | 569de2362d3Smrg ((tex_res->w - 1) << TEX_WIDTH_shift)); 570de2362d3Smrg 571de2362d3Smrg if (tex_res->tile_type) 572de2362d3Smrg sq_tex_resource_word0 |= TILE_TYPE_bit; 573de2362d3Smrg 574de2362d3Smrg sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 575de2362d3Smrg 576de2362d3Smrg if (tex_res->h) 577de2362d3Smrg sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 578de2362d3Smrg if (tex_res->depth) 579de2362d3Smrg sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 580de2362d3Smrg 581de2362d3Smrg sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 582de2362d3Smrg (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 583de2362d3Smrg (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 584de2362d3Smrg (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 585de2362d3Smrg (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 586de2362d3Smrg (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 587de2362d3Smrg (tex_res->request_size << REQUEST_SIZE_shift) | 588de2362d3Smrg (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 589de2362d3Smrg (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 590de2362d3Smrg (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 591de2362d3Smrg (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 592de2362d3Smrg (tex_res->base_level << BASE_LEVEL_shift)); 593de2362d3Smrg 594de2362d3Smrg if (tex_res->srf_mode_all) 595de2362d3Smrg sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 596de2362d3Smrg if (tex_res->force_degamma) 597de2362d3Smrg sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 598de2362d3Smrg 599de2362d3Smrg sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 600de2362d3Smrg (tex_res->base_array << BASE_ARRAY_shift) | 601de2362d3Smrg (tex_res->last_array << LAST_ARRAY_shift)); 602de2362d3Smrg 603de2362d3Smrg sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 604de2362d3Smrg (tex_res->perf_modulation << PERF_MODULATION_shift) | 605de2362d3Smrg (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 606de2362d3Smrg 607de2362d3Smrg if (tex_res->interlaced) 608de2362d3Smrg sq_tex_resource_word6 |= INTERLACED_bit; 609de2362d3Smrg 610de2362d3Smrg /* flush texture cache */ 611de2362d3Smrg r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 612de2362d3Smrg tex_res->size, tex_res->base, 613de2362d3Smrg tex_res->bo, domain, 0); 614de2362d3Smrg 615de2362d3Smrg BEGIN_BATCH(9 + 4); 616de2362d3Smrg PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 617de2362d3Smrg E32(sq_tex_resource_word0); 618de2362d3Smrg E32(sq_tex_resource_word1); 619de2362d3Smrg E32(((tex_res->base) >> 8)); 620de2362d3Smrg E32(((tex_res->mip_base) >> 8)); 621de2362d3Smrg E32(sq_tex_resource_word4); 622de2362d3Smrg E32(sq_tex_resource_word5); 623de2362d3Smrg E32(sq_tex_resource_word6); 624de2362d3Smrg RELOC_BATCH(tex_res->bo, domain, 0); 625de2362d3Smrg RELOC_BATCH(tex_res->mip_bo, domain, 0); 626de2362d3Smrg END_BATCH(); 627de2362d3Smrg} 628de2362d3Smrg 629de2362d3Smrgvoid 630de2362d3Smrgr600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 631de2362d3Smrg{ 632de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 633de2362d3Smrg uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 634de2362d3Smrg 635de2362d3Smrg sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 636de2362d3Smrg (s->clamp_y << CLAMP_Y_shift) | 637de2362d3Smrg (s->clamp_z << CLAMP_Z_shift) | 638de2362d3Smrg (s->xy_mag_filter << XY_MAG_FILTER_shift) | 639de2362d3Smrg (s->xy_min_filter << XY_MIN_FILTER_shift) | 640de2362d3Smrg (s->z_filter << Z_FILTER_shift) | 641de2362d3Smrg (s->mip_filter << MIP_FILTER_shift) | 642de2362d3Smrg (s->border_color << BORDER_COLOR_TYPE_shift) | 643de2362d3Smrg (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 644de2362d3Smrg (s->chroma_key << CHROMA_KEY_shift)); 645de2362d3Smrg if (s->point_sampling_clamp) 646de2362d3Smrg sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 647de2362d3Smrg if (s->tex_array_override) 648de2362d3Smrg sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 649de2362d3Smrg if (s->lod_uses_minor_axis) 650de2362d3Smrg sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 651de2362d3Smrg 652de2362d3Smrg sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 653de2362d3Smrg (s->max_lod << MAX_LOD_shift) | 654de2362d3Smrg (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 655de2362d3Smrg 656de2362d3Smrg sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 657de2362d3Smrg (s->perf_mip << PERF_MIP_shift) | 658de2362d3Smrg (s->perf_z << PERF_Z_shift)); 659de2362d3Smrg if (s->mc_coord_truncate) 660de2362d3Smrg sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 661de2362d3Smrg if (s->force_degamma) 662de2362d3Smrg sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 663de2362d3Smrg if (s->high_precision_filter) 664de2362d3Smrg sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 665de2362d3Smrg if (s->fetch_4) 666de2362d3Smrg sq_tex_sampler_word2 |= FETCH_4_bit; 667de2362d3Smrg if (s->sample_is_pcf) 668de2362d3Smrg sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 669de2362d3Smrg if (s->type) 670de2362d3Smrg sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 671de2362d3Smrg 672de2362d3Smrg BEGIN_BATCH(5); 673de2362d3Smrg PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 674de2362d3Smrg E32(sq_tex_sampler_word0); 675de2362d3Smrg E32(sq_tex_sampler_word1); 676de2362d3Smrg E32(sq_tex_sampler_word2); 677de2362d3Smrg END_BATCH(); 678de2362d3Smrg} 679de2362d3Smrg 680de2362d3Smrg//XXX deal with clip offsets in clip setup 681de2362d3Smrgvoid 682de2362d3Smrgr600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 683de2362d3Smrg{ 684de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 685de2362d3Smrg 686de2362d3Smrg BEGIN_BATCH(4); 687de2362d3Smrg PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 688de2362d3Smrg E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 689de2362d3Smrg (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 690de2362d3Smrg E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 691de2362d3Smrg (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 692de2362d3Smrg END_BATCH(); 693de2362d3Smrg} 694de2362d3Smrg 695de2362d3Smrgvoid 696de2362d3Smrgr600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 697de2362d3Smrg{ 698de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 699de2362d3Smrg 700de2362d3Smrg BEGIN_BATCH(4); 701de2362d3Smrg PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 702de2362d3Smrg E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 703de2362d3Smrg (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 704de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 705de2362d3Smrg E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 706de2362d3Smrg (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 707de2362d3Smrg END_BATCH(); 708de2362d3Smrg} 709de2362d3Smrg 710de2362d3Smrgvoid 711de2362d3Smrgr600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 712de2362d3Smrg{ 713de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 714de2362d3Smrg 715de2362d3Smrg BEGIN_BATCH(4); 716de2362d3Smrg PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 717de2362d3Smrg E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 718de2362d3Smrg (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 719de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 720de2362d3Smrg E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 721de2362d3Smrg (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 722de2362d3Smrg END_BATCH(); 723de2362d3Smrg} 724de2362d3Smrg 725de2362d3Smrgvoid 726de2362d3Smrgr600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 727de2362d3Smrg{ 728de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 729de2362d3Smrg 730de2362d3Smrg BEGIN_BATCH(4); 731de2362d3Smrg PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 732de2362d3Smrg E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 733de2362d3Smrg (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 734de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 735de2362d3Smrg E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 736de2362d3Smrg (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 737de2362d3Smrg END_BATCH(); 738de2362d3Smrg} 739de2362d3Smrg 740de2362d3Smrgvoid 741de2362d3Smrgr600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 742de2362d3Smrg{ 743de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 744de2362d3Smrg 745de2362d3Smrg BEGIN_BATCH(4); 746de2362d3Smrg PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 747de2362d3Smrg E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 748de2362d3Smrg (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 749de2362d3Smrg E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 750de2362d3Smrg (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 751de2362d3Smrg END_BATCH(); 752de2362d3Smrg} 753de2362d3Smrg 754de2362d3Smrg/* 755de2362d3Smrg * Setup of default state 756de2362d3Smrg */ 757de2362d3Smrg 758de2362d3Smrgvoid 759de2362d3Smrgr600_set_default_state(ScrnInfoPtr pScrn) 760de2362d3Smrg{ 761de2362d3Smrg tex_resource_t tex_res; 762de2362d3Smrg shader_config_t fs_conf; 763de2362d3Smrg sq_config_t sq_conf; 764de2362d3Smrg int i; 765de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 766de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 767de2362d3Smrg 768de2362d3Smrg if (accel_state->XInited3D) 769de2362d3Smrg return; 770de2362d3Smrg 771de2362d3Smrg memset(&tex_res, 0, sizeof(tex_resource_t)); 772de2362d3Smrg memset(&fs_conf, 0, sizeof(shader_config_t)); 773de2362d3Smrg 774de2362d3Smrg accel_state->XInited3D = TRUE; 775de2362d3Smrg 776de2362d3Smrg r600_start_3d(pScrn); 777de2362d3Smrg 778de2362d3Smrg // SQ 779de2362d3Smrg sq_conf.ps_prio = 0; 780de2362d3Smrg sq_conf.vs_prio = 1; 781de2362d3Smrg sq_conf.gs_prio = 2; 782de2362d3Smrg sq_conf.es_prio = 3; 783de2362d3Smrg // need to set stack/thread/gpr limits based on the asic 784de2362d3Smrg // for now just set them low enough so any card will work 785de2362d3Smrg // see r600_cp.c in the drm 786de2362d3Smrg switch (info->ChipFamily) { 787de2362d3Smrg case CHIP_FAMILY_R600: 788de2362d3Smrg sq_conf.num_ps_gprs = 192; 789de2362d3Smrg sq_conf.num_vs_gprs = 56; 790de2362d3Smrg sq_conf.num_temp_gprs = 4; 791de2362d3Smrg sq_conf.num_gs_gprs = 0; 792de2362d3Smrg sq_conf.num_es_gprs = 0; 793de2362d3Smrg sq_conf.num_ps_threads = 136; 794de2362d3Smrg sq_conf.num_vs_threads = 48; 795de2362d3Smrg sq_conf.num_gs_threads = 4; 796de2362d3Smrg sq_conf.num_es_threads = 4; 797de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 798de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 799de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 800de2362d3Smrg sq_conf.num_es_stack_entries = 0; 801de2362d3Smrg break; 802de2362d3Smrg case CHIP_FAMILY_RV630: 803de2362d3Smrg case CHIP_FAMILY_RV635: 804de2362d3Smrg sq_conf.num_ps_gprs = 84; 805de2362d3Smrg sq_conf.num_vs_gprs = 36; 806de2362d3Smrg sq_conf.num_temp_gprs = 4; 807de2362d3Smrg sq_conf.num_gs_gprs = 0; 808de2362d3Smrg sq_conf.num_es_gprs = 0; 809de2362d3Smrg sq_conf.num_ps_threads = 144; 810de2362d3Smrg sq_conf.num_vs_threads = 40; 811de2362d3Smrg sq_conf.num_gs_threads = 4; 812de2362d3Smrg sq_conf.num_es_threads = 4; 813de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 814de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 815de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 816de2362d3Smrg sq_conf.num_es_stack_entries = 16; 817de2362d3Smrg break; 818de2362d3Smrg case CHIP_FAMILY_RV610: 819de2362d3Smrg case CHIP_FAMILY_RV620: 820de2362d3Smrg case CHIP_FAMILY_RS780: 821de2362d3Smrg case CHIP_FAMILY_RS880: 822de2362d3Smrg default: 823de2362d3Smrg sq_conf.num_ps_gprs = 84; 824de2362d3Smrg sq_conf.num_vs_gprs = 36; 825de2362d3Smrg sq_conf.num_temp_gprs = 4; 826de2362d3Smrg sq_conf.num_gs_gprs = 0; 827de2362d3Smrg sq_conf.num_es_gprs = 0; 828de2362d3Smrg sq_conf.num_ps_threads = 136; 829de2362d3Smrg sq_conf.num_vs_threads = 48; 830de2362d3Smrg sq_conf.num_gs_threads = 4; 831de2362d3Smrg sq_conf.num_es_threads = 4; 832de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 833de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 834de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 835de2362d3Smrg sq_conf.num_es_stack_entries = 16; 836de2362d3Smrg break; 837de2362d3Smrg case CHIP_FAMILY_RV670: 838de2362d3Smrg sq_conf.num_ps_gprs = 144; 839de2362d3Smrg sq_conf.num_vs_gprs = 40; 840de2362d3Smrg sq_conf.num_temp_gprs = 4; 841de2362d3Smrg sq_conf.num_gs_gprs = 0; 842de2362d3Smrg sq_conf.num_es_gprs = 0; 843de2362d3Smrg sq_conf.num_ps_threads = 136; 844de2362d3Smrg sq_conf.num_vs_threads = 48; 845de2362d3Smrg sq_conf.num_gs_threads = 4; 846de2362d3Smrg sq_conf.num_es_threads = 4; 847de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 848de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 849de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 850de2362d3Smrg sq_conf.num_es_stack_entries = 16; 851de2362d3Smrg break; 852de2362d3Smrg case CHIP_FAMILY_RV770: 853de2362d3Smrg sq_conf.num_ps_gprs = 192; 854de2362d3Smrg sq_conf.num_vs_gprs = 56; 855de2362d3Smrg sq_conf.num_temp_gprs = 4; 856de2362d3Smrg sq_conf.num_gs_gprs = 0; 857de2362d3Smrg sq_conf.num_es_gprs = 0; 858de2362d3Smrg sq_conf.num_ps_threads = 188; 859de2362d3Smrg sq_conf.num_vs_threads = 60; 860de2362d3Smrg sq_conf.num_gs_threads = 0; 861de2362d3Smrg sq_conf.num_es_threads = 0; 862de2362d3Smrg sq_conf.num_ps_stack_entries = 256; 863de2362d3Smrg sq_conf.num_vs_stack_entries = 256; 864de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 865de2362d3Smrg sq_conf.num_es_stack_entries = 0; 866de2362d3Smrg break; 867de2362d3Smrg case CHIP_FAMILY_RV730: 868de2362d3Smrg case CHIP_FAMILY_RV740: 869de2362d3Smrg sq_conf.num_ps_gprs = 84; 870de2362d3Smrg sq_conf.num_vs_gprs = 36; 871de2362d3Smrg sq_conf.num_temp_gprs = 4; 872de2362d3Smrg sq_conf.num_gs_gprs = 0; 873de2362d3Smrg sq_conf.num_es_gprs = 0; 874de2362d3Smrg sq_conf.num_ps_threads = 188; 875de2362d3Smrg sq_conf.num_vs_threads = 60; 876de2362d3Smrg sq_conf.num_gs_threads = 0; 877de2362d3Smrg sq_conf.num_es_threads = 0; 878de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 879de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 880de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 881de2362d3Smrg sq_conf.num_es_stack_entries = 0; 882de2362d3Smrg break; 883de2362d3Smrg case CHIP_FAMILY_RV710: 884de2362d3Smrg sq_conf.num_ps_gprs = 192; 885de2362d3Smrg sq_conf.num_vs_gprs = 56; 886de2362d3Smrg sq_conf.num_temp_gprs = 4; 887de2362d3Smrg sq_conf.num_gs_gprs = 0; 888de2362d3Smrg sq_conf.num_es_gprs = 0; 889de2362d3Smrg sq_conf.num_ps_threads = 144; 890de2362d3Smrg sq_conf.num_vs_threads = 48; 891de2362d3Smrg sq_conf.num_gs_threads = 0; 892de2362d3Smrg sq_conf.num_es_threads = 0; 893de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 894de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 895de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 896de2362d3Smrg sq_conf.num_es_stack_entries = 0; 897de2362d3Smrg break; 898de2362d3Smrg } 899de2362d3Smrg 900de2362d3Smrg r600_sq_setup(pScrn, &sq_conf); 901de2362d3Smrg 902de2362d3Smrg /* set fake reloc for unused depth */ 903de2362d3Smrg BEGIN_BATCH(3 + 2); 904de2362d3Smrg EREG(DB_DEPTH_INFO, 0); 905de2362d3Smrg RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 906de2362d3Smrg END_BATCH(); 907de2362d3Smrg 908de2362d3Smrg BEGIN_BATCH(80); 909de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) { 910de2362d3Smrg EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 911de2362d3Smrg (28 << TD_FIFO_CREDIT_shift))); 912de2362d3Smrg EREG(VC_ENHANCE, 0); 913de2362d3Smrg EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 914de2362d3Smrg EREG(DB_DEBUG, 0x82000000); /* ? */ 915de2362d3Smrg EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 916de2362d3Smrg (16 << DEPTH_FLUSH_shift) | 917de2362d3Smrg (0 << FORCE_SUMMARIZE_shift) | 918de2362d3Smrg (4 << DEPTH_PENDING_FREE_shift) | 919de2362d3Smrg (16 << DEPTH_CACHELINE_FREE_shift) | 920de2362d3Smrg 0)); 921de2362d3Smrg } else { 922de2362d3Smrg EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 923de2362d3Smrg (28 << TD_FIFO_CREDIT_shift))); 924de2362d3Smrg EREG(VC_ENHANCE, 0); 925de2362d3Smrg EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 926de2362d3Smrg EREG(DB_DEBUG, 0); 927de2362d3Smrg EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 928de2362d3Smrg (16 << DEPTH_FLUSH_shift) | 929de2362d3Smrg (0 << FORCE_SUMMARIZE_shift) | 930de2362d3Smrg (4 << DEPTH_PENDING_FREE_shift) | 931de2362d3Smrg (4 << DEPTH_CACHELINE_FREE_shift) | 932de2362d3Smrg 0)); 933de2362d3Smrg } 934de2362d3Smrg 935de2362d3Smrg PACK0(SQ_VTX_BASE_VTX_LOC, 2); 936de2362d3Smrg E32(0); 937de2362d3Smrg E32(0); 938de2362d3Smrg 939de2362d3Smrg PACK0(SQ_ESGS_RING_ITEMSIZE, 9); 940de2362d3Smrg E32(0); // SQ_ESGS_RING_ITEMSIZE 941de2362d3Smrg E32(0); // SQ_GSVS_RING_ITEMSIZE 942de2362d3Smrg E32(0); // SQ_ESTMP_RING_ITEMSIZE 943de2362d3Smrg E32(0); // SQ_GSTMP_RING_ITEMSIZE 944de2362d3Smrg E32(0); // SQ_VSTMP_RING_ITEMSIZE 945de2362d3Smrg E32(0); // SQ_PSTMP_RING_ITEMSIZE 946de2362d3Smrg E32(0); // SQ_FBUF_RING_ITEMSIZE 947de2362d3Smrg E32(0); // SQ_REDUC_RING_ITEMSIZE 948de2362d3Smrg E32(0); // SQ_GS_VERT_ITEMSIZE 949de2362d3Smrg 950de2362d3Smrg // DB 951de2362d3Smrg EREG(DB_DEPTH_CONTROL, 0); 952de2362d3Smrg PACK0(DB_RENDER_CONTROL, 2); 953de2362d3Smrg E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 954de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 955de2362d3Smrg E32(FORCE_SHADER_Z_ORDER_bit); 956de2362d3Smrg else 957de2362d3Smrg E32(0); 958de2362d3Smrg EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 959de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET1_shift) | 960de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET2_shift) | 961de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET3_shift))); 962de2362d3Smrg EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 963de2362d3Smrg DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 964de2362d3Smrg 965de2362d3Smrg PACK0(DB_STENCIL_CLEAR, 2); 966de2362d3Smrg E32(0); // DB_STENCIL_CLEAR 967de2362d3Smrg E32(0); // DB_DEPTH_CLEAR 968de2362d3Smrg 969de2362d3Smrg PACK0(DB_STENCILREFMASK, 3); 970de2362d3Smrg E32(0); // DB_STENCILREFMASK 971de2362d3Smrg E32(0); // DB_STENCILREFMASK_BF 972de2362d3Smrg E32(0); // SX_ALPHA_REF 973de2362d3Smrg 974de2362d3Smrg PACK0(CB_CLRCMP_CONTROL, 4); 975de2362d3Smrg E32(1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 976de2362d3Smrg E32(0); // CB_CLRCMP_SRC 977de2362d3Smrg E32(0); // CB_CLRCMP_DST 978de2362d3Smrg E32(0); // CB_CLRCMP_MSK 979de2362d3Smrg 980de2362d3Smrg EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 981de2362d3Smrg EREG(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 982de2362d3Smrg 983de2362d3Smrg PACK0(SX_ALPHA_TEST_CONTROL, 5); 984de2362d3Smrg E32(0); // SX_ALPHA_TEST_CONTROL 985de2362d3Smrg E32(0x00000000); // CB_BLEND_RED 986de2362d3Smrg E32(0x00000000); // CB_BLEND_GREEN 987de2362d3Smrg E32(0x00000000); // CB_BLEND_BLUE 988de2362d3Smrg E32(0x00000000); // CB_BLEND_ALPHA 989de2362d3Smrg 990de2362d3Smrg EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 991de2362d3Smrg (0 << WINDOW_Y_OFFSET_shift))); 992de2362d3Smrg 993de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 994de2362d3Smrg EREG(R7xx_PA_SC_EDGERULE, 0x00000000); 995de2362d3Smrg else 996de2362d3Smrg EREG(R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 997de2362d3Smrg 998de2362d3Smrg EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 999de2362d3Smrg 1000de2362d3Smrg END_BATCH(); 1001de2362d3Smrg 1002de2362d3Smrg /* clip boolean is set to always visible -> doesn't matter */ 1003de2362d3Smrg for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1004de2362d3Smrg r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192); 1005de2362d3Smrg 1006de2362d3Smrg for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1007de2362d3Smrg r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192); 1008de2362d3Smrg 1009de2362d3Smrg BEGIN_BATCH(49); 1010de2362d3Smrg PACK0(PA_SC_MPASS_PS_CNTL, 2); 1011de2362d3Smrg E32(0); 1012de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 1013de2362d3Smrg E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1014de2362d3Smrg else 1015de2362d3Smrg E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1016de2362d3Smrg 0x00500000)); /* ? */ 1017de2362d3Smrg 1018de2362d3Smrg PACK0(PA_SC_LINE_CNTL, 9); 1019de2362d3Smrg E32(0); // PA_SC_LINE_CNTL 1020de2362d3Smrg E32(0); // PA_SC_AA_CONFIG 1021de2362d3Smrg E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL 1022de2362d3Smrg (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 1023de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 1024de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 1025de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1026de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 1027de2362d3Smrg E32(0); // PA_SC_AA_SAMPLE_LOCS_MCTX 1028de2362d3Smrg E32(0); // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M 1029de2362d3Smrg 1030de2362d3Smrg EREG(PA_SC_AA_MASK, 0xFFFFFFFF); 1031de2362d3Smrg 1032de2362d3Smrg PACK0(PA_CL_CLIP_CNTL, 5); 1033de2362d3Smrg E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1034de2362d3Smrg E32(FACE_bit); // PA_SU_SC_MODE_CNTL 1035de2362d3Smrg E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1036de2362d3Smrg E32(0); // PA_CL_VS_OUT_CNTL 1037de2362d3Smrg E32(0); // PA_CL_NANINF_CNTL 1038de2362d3Smrg 1039de2362d3Smrg PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1040de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL 1041de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_CLAMP 1042de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE 1043de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET 1044de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE 1045de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET 1046de2362d3Smrg 1047de2362d3Smrg // SPI 1048de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 1049de2362d3Smrg EREG(R7xx_SPI_THREAD_GROUPING, 0); 1050de2362d3Smrg else 1051de2362d3Smrg EREG(R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1052de2362d3Smrg 1053de2362d3Smrg /* default Interpolator setup */ 1054de2362d3Smrg EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1055de2362d3Smrg (1 << SEMANTIC_1_shift))); 1056de2362d3Smrg PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1057de2362d3Smrg /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1058de2362d3Smrg E32(((0 << SEMANTIC_shift) | 1059de2362d3Smrg (0x01 << DEFAULT_VAL_shift) | 1060de2362d3Smrg SEL_CENTROID_bit)); 1061de2362d3Smrg /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1062de2362d3Smrg E32(((1 << SEMANTIC_shift) | 1063de2362d3Smrg (0x01 << DEFAULT_VAL_shift) | 1064de2362d3Smrg SEL_CENTROID_bit)); 1065de2362d3Smrg 1066de2362d3Smrg PACK0(SPI_INPUT_Z, 4); 1067de2362d3Smrg E32(0); // SPI_INPUT_Z 1068de2362d3Smrg E32(0); // SPI_FOG_CNTL 1069de2362d3Smrg E32(0); // SPI_FOG_FUNC_SCALE 1070de2362d3Smrg E32(0); // SPI_FOG_FUNC_BIAS 1071de2362d3Smrg 1072de2362d3Smrg END_BATCH(); 1073de2362d3Smrg 1074de2362d3Smrg // clear FS 1075de2362d3Smrg fs_conf.bo = accel_state->shaders_bo; 1076de2362d3Smrg r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1077de2362d3Smrg 1078de2362d3Smrg // VGT 1079de2362d3Smrg BEGIN_BATCH(46); 1080de2362d3Smrg PACK0(VGT_MAX_VTX_INDX, 4); 1081de2362d3Smrg E32(0xffffff); // VGT_MAX_VTX_INDX 1082de2362d3Smrg E32(0); // VGT_MIN_VTX_INDX 1083de2362d3Smrg E32(0); // VGT_INDX_OFFSET 1084de2362d3Smrg E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX 1085de2362d3Smrg 1086de2362d3Smrg EREG(VGT_PRIMITIVEID_EN, 0); 1087de2362d3Smrg EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 1088de2362d3Smrg 1089de2362d3Smrg PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 1090de2362d3Smrg E32(0); // VGT_INSTANCE_STEP_RATE_0 1091de2362d3Smrg E32(0); // VGT_INSTANCE_STEP_RATE_1 1092de2362d3Smrg 1093de2362d3Smrg PACK0(PA_SU_POINT_SIZE, 17); 1094de2362d3Smrg E32(0); // PA_SU_POINT_SIZE 1095de2362d3Smrg E32(0); // PA_SU_POINT_MINMAX 1096de2362d3Smrg E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1097de2362d3Smrg E32(0); // PA_SC_LINE_STIPPLE 1098de2362d3Smrg E32(0); // VGT_OUTPUT_PATH_CNTL 1099de2362d3Smrg E32(0); // VGT_HOS_CNTL 1100de2362d3Smrg E32(0); // VGT_HOS_MAX_TESS_LEVEL 1101de2362d3Smrg E32(0); // VGT_HOS_MIN_TESS_LEVEL 1102de2362d3Smrg E32(0); // VGT_HOS_REUSE_DEPTH 1103de2362d3Smrg E32(0); // VGT_GROUP_PRIM_TYPE 1104de2362d3Smrg E32(0); // VGT_GROUP_FIRST_DECR 1105de2362d3Smrg E32(0); // VGT_GROUP_DECR 1106de2362d3Smrg E32(0); // VGT_GROUP_VECT_0_CNTL 1107de2362d3Smrg E32(0); // VGT_GROUP_VECT_1_CNTL 1108de2362d3Smrg E32(0); // VGT_GROUP_VECT_0_FMT_CNTL 1109de2362d3Smrg E32(0); // VGT_GROUP_VECT_1_FMT_CNTL 1110de2362d3Smrg E32(0); // VGT_GS_MODE 1111de2362d3Smrg 1112de2362d3Smrg PACK0(VGT_STRMOUT_EN, 3); 1113de2362d3Smrg E32(0); // VGT_STRMOUT_EN 1114de2362d3Smrg E32(0); // VGT_REUSE_OFF 1115de2362d3Smrg E32(0); // VGT_VTX_CNT_EN 1116de2362d3Smrg 1117de2362d3Smrg EREG(VGT_STRMOUT_BUFFER_EN, 0); 1118de2362d3Smrg EREG(SX_MISC, 0); 1119de2362d3Smrg END_BATCH(); 1120de2362d3Smrg} 1121de2362d3Smrg 1122de2362d3Smrg 1123de2362d3Smrg/* 1124de2362d3Smrg * Commands 1125de2362d3Smrg */ 1126de2362d3Smrg 1127de2362d3Smrgvoid 1128de2362d3Smrgr600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices) 1129de2362d3Smrg{ 1130de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1131de2362d3Smrg uint32_t i, count; 1132de2362d3Smrg 1133de2362d3Smrg // calculate num of packets 1134de2362d3Smrg count = 2; 1135de2362d3Smrg if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1136de2362d3Smrg count += (draw_conf->num_indices + 1) / 2; 1137de2362d3Smrg else 1138de2362d3Smrg count += draw_conf->num_indices; 1139de2362d3Smrg 1140de2362d3Smrg BEGIN_BATCH(8 + count); 1141de2362d3Smrg EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1142de2362d3Smrg PACK3(IT_INDEX_TYPE, 1); 1143de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 1144de2362d3Smrg E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1145de2362d3Smrg#else 1146de2362d3Smrg E32(draw_conf->index_type); 1147de2362d3Smrg#endif 1148de2362d3Smrg PACK3(IT_NUM_INSTANCES, 1); 1149de2362d3Smrg E32(draw_conf->num_instances); 1150de2362d3Smrg 1151de2362d3Smrg PACK3(IT_DRAW_INDEX_IMMD, count); 1152de2362d3Smrg E32(draw_conf->num_indices); 1153de2362d3Smrg E32(draw_conf->vgt_draw_initiator); 1154de2362d3Smrg 1155de2362d3Smrg if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1156de2362d3Smrg for (i = 0; i < draw_conf->num_indices; i += 2) { 1157de2362d3Smrg if ((i + 1) == draw_conf->num_indices) 1158de2362d3Smrg E32(indices[i]); 1159de2362d3Smrg else 1160de2362d3Smrg E32((indices[i] | (indices[i + 1] << 16))); 1161de2362d3Smrg } 1162de2362d3Smrg } else { 1163de2362d3Smrg for (i = 0; i < draw_conf->num_indices; i++) 1164de2362d3Smrg E32(indices[i]); 1165de2362d3Smrg } 1166de2362d3Smrg END_BATCH(); 1167de2362d3Smrg} 1168de2362d3Smrg 1169de2362d3Smrgvoid 1170de2362d3Smrgr600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1171de2362d3Smrg{ 1172de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1173de2362d3Smrg 1174de2362d3Smrg BEGIN_BATCH(10); 1175de2362d3Smrg EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1176de2362d3Smrg PACK3(IT_INDEX_TYPE, 1); 1177de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 1178de2362d3Smrg E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1179de2362d3Smrg#else 1180de2362d3Smrg E32(draw_conf->index_type); 1181de2362d3Smrg#endif 1182de2362d3Smrg PACK3(IT_NUM_INSTANCES, 1); 1183de2362d3Smrg E32(draw_conf->num_instances); 1184de2362d3Smrg PACK3(IT_DRAW_INDEX_AUTO, 2); 1185de2362d3Smrg E32(draw_conf->num_indices); 1186de2362d3Smrg E32(draw_conf->vgt_draw_initiator); 1187de2362d3Smrg END_BATCH(); 1188de2362d3Smrg} 1189de2362d3Smrg 1190de2362d3Smrgvoid r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1191de2362d3Smrg{ 1192de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1193de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 1194de2362d3Smrg draw_config_t draw_conf; 1195de2362d3Smrg vtx_resource_t vtx_res; 1196de2362d3Smrg 1197de2362d3Smrg if (accel_state->vbo.vb_start_op == -1) 1198de2362d3Smrg return; 1199de2362d3Smrg 1200de2362d3Smrg CLEAR (draw_conf); 1201de2362d3Smrg CLEAR (vtx_res); 1202de2362d3Smrg 1203de2362d3Smrg if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1204de2362d3Smrg R600IBDiscard(pScrn); 1205de2362d3Smrg return; 1206de2362d3Smrg } 1207de2362d3Smrg 1208de2362d3Smrg /* Vertex buffer setup */ 1209de2362d3Smrg accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1210de2362d3Smrg vtx_res.id = SQ_VTX_RESOURCE_vs; 1211de2362d3Smrg vtx_res.vtx_size_dw = vtx_size / 4; 1212de2362d3Smrg vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1213de2362d3Smrg vtx_res.mem_req_size = 1; 1214de2362d3Smrg vtx_res.vb_addr = accel_state->vbo.vb_start_op; 1215de2362d3Smrg vtx_res.bo = accel_state->vbo.vb_bo; 1216de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 1217de2362d3Smrg vtx_res.endian = SQ_ENDIAN_8IN32; 1218de2362d3Smrg#endif 1219de2362d3Smrg r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1220de2362d3Smrg 1221de2362d3Smrg /* Draw */ 1222de2362d3Smrg draw_conf.prim_type = DI_PT_RECTLIST; 1223de2362d3Smrg draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1224de2362d3Smrg draw_conf.num_instances = 1; 1225de2362d3Smrg draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1226de2362d3Smrg draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1227de2362d3Smrg 1228de2362d3Smrg r600_draw_auto(pScrn, &draw_conf); 1229de2362d3Smrg 1230de2362d3Smrg /* XXX drm should handle this in fence submit */ 1231de2362d3Smrg r600_wait_3d_idle_clean(pScrn); 1232de2362d3Smrg 1233de2362d3Smrg /* sync dst surface */ 1234de2362d3Smrg r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1235de2362d3Smrg accel_state->dst_size, 0, 1236de2362d3Smrg accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1237de2362d3Smrg 1238de2362d3Smrg accel_state->vbo.vb_start_op = -1; 1239de2362d3Smrg accel_state->ib_reset_op = 0; 1240de2362d3Smrg 1241de2362d3Smrg} 1242de2362d3Smrg 1243