r6xx_accel.c revision 7314432e
1de2362d3Smrg/* 2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc. 3de2362d3Smrg * 4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5de2362d3Smrg * copy of this software and associated documentation files (the "Software"), 6de2362d3Smrg * to deal in the Software without restriction, including without limitation 7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the 9de2362d3Smrg * Software is furnished to do so, subject to the following conditions: 10de2362d3Smrg * 11de2362d3Smrg * The above copyright notice and this permission notice (including the next 12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the 13de2362d3Smrg * Software. 14de2362d3Smrg * 15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21de2362d3Smrg * SOFTWARE. 22de2362d3Smrg * 23de2362d3Smrg * Authors: Alex Deucher <alexander.deucher@amd.com> 24de2362d3Smrg * Matthias Hopf <mhopf@suse.de> 25de2362d3Smrg */ 26de2362d3Smrg#ifdef HAVE_CONFIG_H 27de2362d3Smrg#include "config.h" 28de2362d3Smrg#endif 29de2362d3Smrg 30de2362d3Smrg#include "xf86.h" 31de2362d3Smrg 32de2362d3Smrg#include <errno.h> 33de2362d3Smrg 34de2362d3Smrg#include "radeon.h" 35de2362d3Smrg#include "r600_shader.h" 36de2362d3Smrg#include "radeon_reg.h" 37de2362d3Smrg#include "r600_reg.h" 38de2362d3Smrg#include "r600_state.h" 39de2362d3Smrg 40de2362d3Smrg#include "radeon_vbo.h" 41de2362d3Smrg#include "radeon_exa_shared.h" 42de2362d3Smrg 43de2362d3Smrgstatic const uint32_t R600_ROP[16] = { 44de2362d3Smrg RADEON_ROP3_ZERO, /* GXclear */ 45de2362d3Smrg RADEON_ROP3_DSa, /* Gxand */ 46de2362d3Smrg RADEON_ROP3_SDna, /* GXandReverse */ 47de2362d3Smrg RADEON_ROP3_S, /* GXcopy */ 48de2362d3Smrg RADEON_ROP3_DSna, /* GXandInverted */ 49de2362d3Smrg RADEON_ROP3_D, /* GXnoop */ 50de2362d3Smrg RADEON_ROP3_DSx, /* GXxor */ 51de2362d3Smrg RADEON_ROP3_DSo, /* GXor */ 52de2362d3Smrg RADEON_ROP3_DSon, /* GXnor */ 53de2362d3Smrg RADEON_ROP3_DSxn, /* GXequiv */ 54de2362d3Smrg RADEON_ROP3_Dn, /* GXinvert */ 55de2362d3Smrg RADEON_ROP3_SDno, /* GXorReverse */ 56de2362d3Smrg RADEON_ROP3_Sn, /* GXcopyInverted */ 57de2362d3Smrg RADEON_ROP3_DSno, /* GXorInverted */ 58de2362d3Smrg RADEON_ROP3_DSan, /* GXnand */ 59de2362d3Smrg RADEON_ROP3_ONE, /* GXset */ 60de2362d3Smrg}; 61de2362d3Smrg 62de2362d3Smrg/* we try and batch operations together under KMS - 63de2362d3Smrg but it doesn't work yet without misrendering */ 64de2362d3Smrg#define KMS_MULTI_OP 1 65de2362d3Smrg 66de2362d3Smrg/* Flush the indirect buffer to the kernel for submission to the card */ 67de2362d3Smrgvoid R600CPFlushIndirect(ScrnInfoPtr pScrn) 68de2362d3Smrg{ 69de2362d3Smrg radeon_cs_flush_indirect(pScrn); 70de2362d3Smrg} 71de2362d3Smrg 72de2362d3Smrgvoid R600IBDiscard(ScrnInfoPtr pScrn) 73de2362d3Smrg{ 74de2362d3Smrg radeon_ib_discard(pScrn); 75de2362d3Smrg} 76de2362d3Smrg 77de2362d3Smrgvoid 78de2362d3Smrgr600_wait_3d_idle_clean(ScrnInfoPtr pScrn) 79de2362d3Smrg{ 80de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 81de2362d3Smrg 82de2362d3Smrg //flush caches, don't generate timestamp 83de2362d3Smrg BEGIN_BATCH(5); 84de2362d3Smrg PACK3(IT_EVENT_WRITE, 1); 85de2362d3Smrg E32(CACHE_FLUSH_AND_INV_EVENT); 86de2362d3Smrg // wait for 3D idle clean 87de2362d3Smrg EREG(WAIT_UNTIL, (WAIT_3D_IDLE_bit | 88de2362d3Smrg WAIT_3D_IDLECLEAN_bit)); 89de2362d3Smrg END_BATCH(); 90de2362d3Smrg} 91de2362d3Smrg 92de2362d3Smrgvoid 93de2362d3Smrgr600_wait_3d_idle(ScrnInfoPtr pScrn) 94de2362d3Smrg{ 95de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 96de2362d3Smrg 97de2362d3Smrg BEGIN_BATCH(3); 98de2362d3Smrg EREG(WAIT_UNTIL, WAIT_3D_IDLE_bit); 99de2362d3Smrg END_BATCH(); 100de2362d3Smrg} 101de2362d3Smrg 102de2362d3Smrgvoid 103de2362d3Smrgr600_start_3d(ScrnInfoPtr pScrn) 104de2362d3Smrg{ 105de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 106de2362d3Smrg 107de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) { 108de2362d3Smrg BEGIN_BATCH(5); 109de2362d3Smrg PACK3(IT_START_3D_CMDBUF, 1); 110de2362d3Smrg E32(0); 111de2362d3Smrg } else 112de2362d3Smrg BEGIN_BATCH(3); 113de2362d3Smrg 114de2362d3Smrg PACK3(IT_CONTEXT_CONTROL, 2); 115de2362d3Smrg E32(0x80000000); 116de2362d3Smrg E32(0x80000000); 117de2362d3Smrg END_BATCH(); 118de2362d3Smrg 119de2362d3Smrg} 120de2362d3Smrg 121de2362d3Smrg/* 122de2362d3Smrg * Setup of functional groups 123de2362d3Smrg */ 124de2362d3Smrg 125de2362d3Smrg// asic stack/thread/gpr limits - need to query the drm 126de2362d3Smrgstatic void 127de2362d3Smrgr600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 128de2362d3Smrg{ 129de2362d3Smrg uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 130de2362d3Smrg uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 131de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 132de2362d3Smrg 133de2362d3Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 134de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 135de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 136de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 137de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 138de2362d3Smrg sq_config = 0; // no VC 139de2362d3Smrg else 140de2362d3Smrg sq_config = VC_ENABLE_bit; 141de2362d3Smrg 142de2362d3Smrg sq_config |= (DX9_CONSTS_bit | 143de2362d3Smrg ALU_INST_PREFER_VECTOR_bit | 144de2362d3Smrg (sq_conf->ps_prio << PS_PRIO_shift) | 145de2362d3Smrg (sq_conf->vs_prio << VS_PRIO_shift) | 146de2362d3Smrg (sq_conf->gs_prio << GS_PRIO_shift) | 147de2362d3Smrg (sq_conf->es_prio << ES_PRIO_shift)); 148de2362d3Smrg 149de2362d3Smrg sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 150de2362d3Smrg (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 151de2362d3Smrg (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 152de2362d3Smrg sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 153de2362d3Smrg (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 154de2362d3Smrg 155de2362d3Smrg sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 156de2362d3Smrg (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 157de2362d3Smrg (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 158de2362d3Smrg (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 159de2362d3Smrg 160de2362d3Smrg sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 161de2362d3Smrg (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 162de2362d3Smrg 163de2362d3Smrg sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 164de2362d3Smrg (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 165de2362d3Smrg 166de2362d3Smrg BEGIN_BATCH(8); 167de2362d3Smrg PACK0(SQ_CONFIG, 6); 168de2362d3Smrg E32(sq_config); 169de2362d3Smrg E32(sq_gpr_resource_mgmt_1); 170de2362d3Smrg E32(sq_gpr_resource_mgmt_2); 171de2362d3Smrg E32(sq_thread_resource_mgmt); 172de2362d3Smrg E32(sq_stack_resource_mgmt_1); 173de2362d3Smrg E32(sq_stack_resource_mgmt_2); 174de2362d3Smrg END_BATCH(); 175de2362d3Smrg} 176de2362d3Smrg 1777314432eSmrgvoid r600_set_blend_color(ScrnInfoPtr pScrn, float *color) 1787314432eSmrg{ 1797314432eSmrg RADEONInfoPtr info = RADEONPTR(pScrn); 1807314432eSmrg 1817314432eSmrg BEGIN_BATCH(2 + 4); 1827314432eSmrg PACK0(CB_BLEND_RED, 4); 1837314432eSmrg EFLOAT(color[0]); /* R */ 1847314432eSmrg EFLOAT(color[1]); /* G */ 1857314432eSmrg EFLOAT(color[2]); /* B */ 1867314432eSmrg EFLOAT(color[3]); /* A */ 1877314432eSmrg END_BATCH(); 1887314432eSmrg} 1897314432eSmrg 1907314432eSmrg 191de2362d3Smrgvoid 192de2362d3Smrgr600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 193de2362d3Smrg{ 194de2362d3Smrg uint32_t cb_color_info, cb_color_control; 195de2362d3Smrg unsigned pitch, slice, h, array_mode; 196de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 197de2362d3Smrg 198de2362d3Smrg 199de2362d3Smrg if (cb_conf->surface) { 200de2362d3Smrg switch (cb_conf->surface->level[0].mode) { 201de2362d3Smrg case RADEON_SURF_MODE_1D: 202de2362d3Smrg array_mode = 2; 203de2362d3Smrg break; 204de2362d3Smrg case RADEON_SURF_MODE_2D: 205de2362d3Smrg array_mode = 4; 206de2362d3Smrg break; 207de2362d3Smrg default: 208de2362d3Smrg array_mode = 0; 209de2362d3Smrg break; 210de2362d3Smrg } 211de2362d3Smrg pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1; 212de2362d3Smrg slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1; 213de2362d3Smrg } else 214de2362d3Smrg { 215de2362d3Smrg array_mode = cb_conf->array_mode; 216de2362d3Smrg pitch = (cb_conf->w / 8) - 1; 217de2362d3Smrg h = RADEON_ALIGN(cb_conf->h, 8); 218de2362d3Smrg slice = ((cb_conf->w * h) / 64) - 1; 219de2362d3Smrg } 220de2362d3Smrg 221de2362d3Smrg cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 222de2362d3Smrg (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 223de2362d3Smrg (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 224de2362d3Smrg (cb_conf->number_type << NUMBER_TYPE_shift) | 225de2362d3Smrg (cb_conf->comp_swap << COMP_SWAP_shift) | 226de2362d3Smrg (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 227de2362d3Smrg if (cb_conf->read_size) 228de2362d3Smrg cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 229de2362d3Smrg if (cb_conf->blend_clamp) 230de2362d3Smrg cb_color_info |= BLEND_CLAMP_bit; 231de2362d3Smrg if (cb_conf->clear_color) 232de2362d3Smrg cb_color_info |= CLEAR_COLOR_bit; 233de2362d3Smrg if (cb_conf->blend_bypass) 234de2362d3Smrg cb_color_info |= BLEND_BYPASS_bit; 235de2362d3Smrg if (cb_conf->blend_float32) 236de2362d3Smrg cb_color_info |= BLEND_FLOAT32_bit; 237de2362d3Smrg if (cb_conf->simple_float) 238de2362d3Smrg cb_color_info |= SIMPLE_FLOAT_bit; 239de2362d3Smrg if (cb_conf->round_mode) 240de2362d3Smrg cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 241de2362d3Smrg if (cb_conf->tile_compact) 242de2362d3Smrg cb_color_info |= TILE_COMPACT_bit; 243de2362d3Smrg if (cb_conf->source_format) 244de2362d3Smrg cb_color_info |= SOURCE_FORMAT_bit; 245de2362d3Smrg 246de2362d3Smrg BEGIN_BATCH(3 + 2); 247de2362d3Smrg EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 248de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 249de2362d3Smrg END_BATCH(); 250de2362d3Smrg 251de2362d3Smrg // rv6xx workaround 252de2362d3Smrg if ((info->ChipFamily > CHIP_FAMILY_R600) && 253de2362d3Smrg (info->ChipFamily < CHIP_FAMILY_RV770)) { 254de2362d3Smrg BEGIN_BATCH(2); 255de2362d3Smrg PACK3(IT_SURFACE_BASE_UPDATE, 1); 256de2362d3Smrg E32((2 << cb_conf->id)); 257de2362d3Smrg END_BATCH(); 258de2362d3Smrg } 259de2362d3Smrg /* Set CMASK & TILE buffer to the offset of color buffer as 260de2362d3Smrg * we don't use those this shouldn't cause any issue and we 261de2362d3Smrg * then have a valid cmd stream 262de2362d3Smrg */ 263de2362d3Smrg BEGIN_BATCH(3 + 2); 264de2362d3Smrg EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 265de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 266de2362d3Smrg END_BATCH(); 267de2362d3Smrg BEGIN_BATCH(3 + 2); 268de2362d3Smrg EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 269de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 270de2362d3Smrg END_BATCH(); 271de2362d3Smrg BEGIN_BATCH(9); 272de2362d3Smrg // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 273de2362d3Smrg EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 274de2362d3Smrg (slice << SLICE_TILE_MAX_shift))); 275de2362d3Smrg EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 276de2362d3Smrg (0 << SLICE_MAX_shift))); 277de2362d3Smrg EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 278de2362d3Smrg (0 << FMASK_TILE_MAX_shift))); 279de2362d3Smrg END_BATCH(); 280de2362d3Smrg 281de2362d3Smrg BEGIN_BATCH(3 + 2); 282de2362d3Smrg EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 283de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 284de2362d3Smrg END_BATCH(); 285de2362d3Smrg 286de2362d3Smrg BEGIN_BATCH(9); 287de2362d3Smrg EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 288de2362d3Smrg cb_color_control = R600_ROP[cb_conf->rop] | 289de2362d3Smrg (cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift); 290de2362d3Smrg if (info->ChipFamily == CHIP_FAMILY_R600) { 291de2362d3Smrg /* no per-MRT blend on R600 */ 292de2362d3Smrg EREG(CB_COLOR_CONTROL, cb_color_control); 293de2362d3Smrg EREG(CB_BLEND_CONTROL, cb_conf->blendcntl); 294de2362d3Smrg } else { 295de2362d3Smrg if (cb_conf->blend_enable) 296de2362d3Smrg cb_color_control |= PER_MRT_BLEND_bit; 297de2362d3Smrg EREG(CB_COLOR_CONTROL, cb_color_control); 298de2362d3Smrg EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 299de2362d3Smrg } 300de2362d3Smrg END_BATCH(); 301de2362d3Smrg} 302de2362d3Smrg 303de2362d3Smrgstatic void 304de2362d3Smrgr600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 305de2362d3Smrg uint32_t size, uint64_t mc_addr, 306de2362d3Smrg struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 307de2362d3Smrg{ 308de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 309de2362d3Smrg uint32_t cp_coher_size; 310de2362d3Smrg if (size == 0xffffffff) 311de2362d3Smrg cp_coher_size = 0xffffffff; 312de2362d3Smrg else 313de2362d3Smrg cp_coher_size = ((size + 255) >> 8); 314de2362d3Smrg 315de2362d3Smrg BEGIN_BATCH(5 + 2); 316de2362d3Smrg PACK3(IT_SURFACE_SYNC, 4); 317de2362d3Smrg E32(sync_type); 318de2362d3Smrg E32(cp_coher_size); 319de2362d3Smrg E32((mc_addr >> 8)); 320de2362d3Smrg E32(10); /* poll interval */ 321de2362d3Smrg RELOC_BATCH(bo, rdomains, wdomain); 322de2362d3Smrg END_BATCH(); 323de2362d3Smrg} 324de2362d3Smrg 325de2362d3Smrg/* inserts a wait for vline in the command stream */ 326de2362d3Smrgvoid 327de2362d3Smrgr600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 328de2362d3Smrg xf86CrtcPtr crtc, int start, int stop) 329de2362d3Smrg{ 330de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 331de2362d3Smrg drmmode_crtc_private_ptr drmmode_crtc; 332de2362d3Smrg 333de2362d3Smrg if (!crtc) 334de2362d3Smrg return; 335de2362d3Smrg 336de2362d3Smrg if (!crtc->enabled) 337de2362d3Smrg return; 338de2362d3Smrg 339de2362d3Smrg if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 340de2362d3Smrg return; 341de2362d3Smrg 342de2362d3Smrg start = max(start, crtc->y); 343de2362d3Smrg stop = min(stop, crtc->y + crtc->mode.VDisplay); 344de2362d3Smrg 345de2362d3Smrg if (start >= stop) 346de2362d3Smrg return; 347de2362d3Smrg 348de2362d3Smrg drmmode_crtc = crtc->driver_private; 349de2362d3Smrg 350de2362d3Smrg BEGIN_BATCH(11); 351de2362d3Smrg /* set the VLINE range */ 352de2362d3Smrg EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 353de2362d3Smrg (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 354de2362d3Smrg (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 355de2362d3Smrg 356de2362d3Smrg /* tell the CP to poll the VLINE state register */ 357de2362d3Smrg PACK3(IT_WAIT_REG_MEM, 6); 358de2362d3Smrg E32(IT_WAIT_REG | IT_WAIT_EQ); 359de2362d3Smrg E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 360de2362d3Smrg E32(0); 361de2362d3Smrg E32(0); // Ref value 362de2362d3Smrg E32(AVIVO_D1MODE_VLINE_STAT); // Mask 363de2362d3Smrg E32(10); // Wait interval 364de2362d3Smrg /* add crtc reloc */ 365de2362d3Smrg PACK3(IT_NOP, 1); 366de2362d3Smrg E32(drmmode_crtc->mode_crtc->crtc_id); 367de2362d3Smrg END_BATCH(); 368de2362d3Smrg} 369de2362d3Smrg 370de2362d3Smrgvoid 371de2362d3Smrgr600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 372de2362d3Smrg{ 373de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 374de2362d3Smrg 375de2362d3Smrg BEGIN_BATCH(8); 376de2362d3Smrg /* Interpolator setup */ 377de2362d3Smrg EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 378de2362d3Smrg PACK0(SPI_PS_IN_CONTROL_0, 3); 379de2362d3Smrg E32((num_interp << NUM_INTERP_shift)); 380de2362d3Smrg E32(0); 381de2362d3Smrg E32(0); 382de2362d3Smrg END_BATCH(); 383de2362d3Smrg} 384de2362d3Smrg 385de2362d3Smrgvoid 386de2362d3Smrgr600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 387de2362d3Smrg{ 388de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 389de2362d3Smrg uint32_t sq_pgm_resources; 390de2362d3Smrg 391de2362d3Smrg sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 392de2362d3Smrg (fs_conf->stack_size << STACK_SIZE_shift)); 393de2362d3Smrg 394de2362d3Smrg if (fs_conf->dx10_clamp) 395de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 396de2362d3Smrg 397de2362d3Smrg BEGIN_BATCH(3 + 2); 398de2362d3Smrg EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 399de2362d3Smrg RELOC_BATCH(fs_conf->bo, domain, 0); 400de2362d3Smrg END_BATCH(); 401de2362d3Smrg 402de2362d3Smrg BEGIN_BATCH(6); 403de2362d3Smrg EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 404de2362d3Smrg EREG(SQ_PGM_CF_OFFSET_FS, 0); 405de2362d3Smrg END_BATCH(); 406de2362d3Smrg} 407de2362d3Smrg 408de2362d3Smrgvoid 409de2362d3Smrgr600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 410de2362d3Smrg{ 411de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 412de2362d3Smrg uint32_t sq_pgm_resources; 413de2362d3Smrg 414de2362d3Smrg sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 415de2362d3Smrg (vs_conf->stack_size << STACK_SIZE_shift)); 416de2362d3Smrg 417de2362d3Smrg if (vs_conf->dx10_clamp) 418de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 419de2362d3Smrg if (vs_conf->fetch_cache_lines) 420de2362d3Smrg sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 421de2362d3Smrg if (vs_conf->uncached_first_inst) 422de2362d3Smrg sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 423de2362d3Smrg 424de2362d3Smrg /* flush SQ cache */ 425de2362d3Smrg r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 426de2362d3Smrg vs_conf->shader_size, vs_conf->shader_addr, 427de2362d3Smrg vs_conf->bo, domain, 0); 428de2362d3Smrg 429de2362d3Smrg BEGIN_BATCH(3 + 2); 430de2362d3Smrg EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 431de2362d3Smrg RELOC_BATCH(vs_conf->bo, domain, 0); 432de2362d3Smrg END_BATCH(); 433de2362d3Smrg 434de2362d3Smrg BEGIN_BATCH(6); 435de2362d3Smrg EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources); 436de2362d3Smrg EREG(SQ_PGM_CF_OFFSET_VS, 0); 437de2362d3Smrg END_BATCH(); 438de2362d3Smrg} 439de2362d3Smrg 440de2362d3Smrgvoid 441de2362d3Smrgr600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 442de2362d3Smrg{ 443de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 444de2362d3Smrg uint32_t sq_pgm_resources; 445de2362d3Smrg 446de2362d3Smrg sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 447de2362d3Smrg (ps_conf->stack_size << STACK_SIZE_shift)); 448de2362d3Smrg 449de2362d3Smrg if (ps_conf->dx10_clamp) 450de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 451de2362d3Smrg if (ps_conf->fetch_cache_lines) 452de2362d3Smrg sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 453de2362d3Smrg if (ps_conf->uncached_first_inst) 454de2362d3Smrg sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 455de2362d3Smrg if (ps_conf->clamp_consts) 456de2362d3Smrg sq_pgm_resources |= CLAMP_CONSTS_bit; 457de2362d3Smrg 458de2362d3Smrg /* flush SQ cache */ 459de2362d3Smrg r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 460de2362d3Smrg ps_conf->shader_size, ps_conf->shader_addr, 461de2362d3Smrg ps_conf->bo, domain, 0); 462de2362d3Smrg 463de2362d3Smrg BEGIN_BATCH(3 + 2); 464de2362d3Smrg EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 465de2362d3Smrg RELOC_BATCH(ps_conf->bo, domain, 0); 466de2362d3Smrg END_BATCH(); 467de2362d3Smrg 468de2362d3Smrg BEGIN_BATCH(9); 469de2362d3Smrg EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources); 470de2362d3Smrg EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 471de2362d3Smrg EREG(SQ_PGM_CF_OFFSET_PS, 0); 472de2362d3Smrg END_BATCH(); 473de2362d3Smrg} 474de2362d3Smrg 475de2362d3Smrgvoid 476de2362d3Smrgr600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf) 477de2362d3Smrg{ 478de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 479de2362d3Smrg int i; 480de2362d3Smrg const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 481de2362d3Smrg 482de2362d3Smrg BEGIN_BATCH(2 + countreg); 483de2362d3Smrg PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 484de2362d3Smrg for (i = 0; i < countreg; i++) 485de2362d3Smrg EFLOAT(const_buf[i]); 486de2362d3Smrg END_BATCH(); 487de2362d3Smrg} 488de2362d3Smrg 489de2362d3Smrgvoid 490de2362d3Smrgr600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 491de2362d3Smrg{ 492de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 493de2362d3Smrg /* bool register order is: ps, vs, gs; one register each 494de2362d3Smrg * 1 bits per bool; 32 bools each for ps, vs, gs. 495de2362d3Smrg */ 496de2362d3Smrg BEGIN_BATCH(3); 497de2362d3Smrg EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 498de2362d3Smrg END_BATCH(); 499de2362d3Smrg} 500de2362d3Smrg 501de2362d3Smrgstatic void 502de2362d3Smrgr600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 503de2362d3Smrg{ 504de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 505de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 506de2362d3Smrg uint32_t sq_vtx_constant_word2; 507de2362d3Smrg 508de2362d3Smrg sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 509de2362d3Smrg ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 510de2362d3Smrg (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 511de2362d3Smrg (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 512de2362d3Smrg (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 513de2362d3Smrg if (res->clamp_x) 514de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 515de2362d3Smrg 516de2362d3Smrg if (res->format_comp_all) 517de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 518de2362d3Smrg 519de2362d3Smrg if (res->srf_mode_all) 520de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 521de2362d3Smrg 522de2362d3Smrg /* flush vertex cache */ 523de2362d3Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 524de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 525de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 526de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 527de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 528de2362d3Smrg r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 529de2362d3Smrg accel_state->vbo.vb_offset, 0, 530de2362d3Smrg res->bo, 531de2362d3Smrg domain, 0); 532de2362d3Smrg else 533de2362d3Smrg r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 534de2362d3Smrg accel_state->vbo.vb_offset, 0, 535de2362d3Smrg res->bo, 536de2362d3Smrg domain, 0); 537de2362d3Smrg 538de2362d3Smrg BEGIN_BATCH(9 + 2); 539de2362d3Smrg PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 540de2362d3Smrg E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 541de2362d3Smrg E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 542de2362d3Smrg E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 543de2362d3Smrg E32(res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 544de2362d3Smrg E32(0); // 4: n/a 545de2362d3Smrg E32(0); // 5: n/a 546de2362d3Smrg E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 547de2362d3Smrg RELOC_BATCH(res->bo, domain, 0); 548de2362d3Smrg END_BATCH(); 549de2362d3Smrg} 550de2362d3Smrg 551de2362d3Smrgvoid 552de2362d3Smrgr600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 553de2362d3Smrg{ 554de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 555de2362d3Smrg uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 556de2362d3Smrg uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 557de2362d3Smrg uint32_t array_mode, pitch; 558de2362d3Smrg 559de2362d3Smrg if (tex_res->surface) { 560de2362d3Smrg switch (tex_res->surface->level[0].mode) { 561de2362d3Smrg case RADEON_SURF_MODE_1D: 562de2362d3Smrg array_mode = 2; 563de2362d3Smrg break; 564de2362d3Smrg case RADEON_SURF_MODE_2D: 565de2362d3Smrg array_mode = 4; 566de2362d3Smrg break; 567de2362d3Smrg default: 568de2362d3Smrg array_mode = 0; 569de2362d3Smrg break; 570de2362d3Smrg } 571de2362d3Smrg pitch = tex_res->surface->level[0].nblk_x >> 3; 572de2362d3Smrg } else 573de2362d3Smrg { 574de2362d3Smrg array_mode = tex_res->tile_mode; 575de2362d3Smrg pitch = (tex_res->pitch + 7) >> 3; 576de2362d3Smrg } 577de2362d3Smrg 578de2362d3Smrg sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 579de2362d3Smrg (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 580de2362d3Smrg 581de2362d3Smrg if (tex_res->w) 582de2362d3Smrg sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) | 583de2362d3Smrg ((tex_res->w - 1) << TEX_WIDTH_shift)); 584de2362d3Smrg 585de2362d3Smrg if (tex_res->tile_type) 586de2362d3Smrg sq_tex_resource_word0 |= TILE_TYPE_bit; 587de2362d3Smrg 588de2362d3Smrg sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 589de2362d3Smrg 590de2362d3Smrg if (tex_res->h) 591de2362d3Smrg sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 592de2362d3Smrg if (tex_res->depth) 593de2362d3Smrg sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 594de2362d3Smrg 595de2362d3Smrg sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 596de2362d3Smrg (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 597de2362d3Smrg (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 598de2362d3Smrg (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 599de2362d3Smrg (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 600de2362d3Smrg (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 601de2362d3Smrg (tex_res->request_size << REQUEST_SIZE_shift) | 602de2362d3Smrg (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 603de2362d3Smrg (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 604de2362d3Smrg (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 605de2362d3Smrg (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 606de2362d3Smrg (tex_res->base_level << BASE_LEVEL_shift)); 607de2362d3Smrg 608de2362d3Smrg if (tex_res->srf_mode_all) 609de2362d3Smrg sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 610de2362d3Smrg if (tex_res->force_degamma) 611de2362d3Smrg sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 612de2362d3Smrg 613de2362d3Smrg sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 614de2362d3Smrg (tex_res->base_array << BASE_ARRAY_shift) | 615de2362d3Smrg (tex_res->last_array << LAST_ARRAY_shift)); 616de2362d3Smrg 617de2362d3Smrg sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 618de2362d3Smrg (tex_res->perf_modulation << PERF_MODULATION_shift) | 619de2362d3Smrg (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 620de2362d3Smrg 621de2362d3Smrg if (tex_res->interlaced) 622de2362d3Smrg sq_tex_resource_word6 |= INTERLACED_bit; 623de2362d3Smrg 624de2362d3Smrg /* flush texture cache */ 625de2362d3Smrg r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 626de2362d3Smrg tex_res->size, tex_res->base, 627de2362d3Smrg tex_res->bo, domain, 0); 628de2362d3Smrg 629de2362d3Smrg BEGIN_BATCH(9 + 4); 630de2362d3Smrg PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 631de2362d3Smrg E32(sq_tex_resource_word0); 632de2362d3Smrg E32(sq_tex_resource_word1); 633de2362d3Smrg E32(((tex_res->base) >> 8)); 634de2362d3Smrg E32(((tex_res->mip_base) >> 8)); 635de2362d3Smrg E32(sq_tex_resource_word4); 636de2362d3Smrg E32(sq_tex_resource_word5); 637de2362d3Smrg E32(sq_tex_resource_word6); 638de2362d3Smrg RELOC_BATCH(tex_res->bo, domain, 0); 639de2362d3Smrg RELOC_BATCH(tex_res->mip_bo, domain, 0); 640de2362d3Smrg END_BATCH(); 641de2362d3Smrg} 642de2362d3Smrg 643de2362d3Smrgvoid 644de2362d3Smrgr600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 645de2362d3Smrg{ 646de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 647de2362d3Smrg uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 648de2362d3Smrg 649de2362d3Smrg sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 650de2362d3Smrg (s->clamp_y << CLAMP_Y_shift) | 651de2362d3Smrg (s->clamp_z << CLAMP_Z_shift) | 652de2362d3Smrg (s->xy_mag_filter << XY_MAG_FILTER_shift) | 653de2362d3Smrg (s->xy_min_filter << XY_MIN_FILTER_shift) | 654de2362d3Smrg (s->z_filter << Z_FILTER_shift) | 655de2362d3Smrg (s->mip_filter << MIP_FILTER_shift) | 656de2362d3Smrg (s->border_color << BORDER_COLOR_TYPE_shift) | 657de2362d3Smrg (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 658de2362d3Smrg (s->chroma_key << CHROMA_KEY_shift)); 659de2362d3Smrg if (s->point_sampling_clamp) 660de2362d3Smrg sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 661de2362d3Smrg if (s->tex_array_override) 662de2362d3Smrg sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 663de2362d3Smrg if (s->lod_uses_minor_axis) 664de2362d3Smrg sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 665de2362d3Smrg 666de2362d3Smrg sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 667de2362d3Smrg (s->max_lod << MAX_LOD_shift) | 668de2362d3Smrg (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 669de2362d3Smrg 670de2362d3Smrg sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 671de2362d3Smrg (s->perf_mip << PERF_MIP_shift) | 672de2362d3Smrg (s->perf_z << PERF_Z_shift)); 673de2362d3Smrg if (s->mc_coord_truncate) 674de2362d3Smrg sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 675de2362d3Smrg if (s->force_degamma) 676de2362d3Smrg sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 677de2362d3Smrg if (s->high_precision_filter) 678de2362d3Smrg sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 679de2362d3Smrg if (s->fetch_4) 680de2362d3Smrg sq_tex_sampler_word2 |= FETCH_4_bit; 681de2362d3Smrg if (s->sample_is_pcf) 682de2362d3Smrg sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 683de2362d3Smrg if (s->type) 684de2362d3Smrg sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 685de2362d3Smrg 686de2362d3Smrg BEGIN_BATCH(5); 687de2362d3Smrg PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 688de2362d3Smrg E32(sq_tex_sampler_word0); 689de2362d3Smrg E32(sq_tex_sampler_word1); 690de2362d3Smrg E32(sq_tex_sampler_word2); 691de2362d3Smrg END_BATCH(); 692de2362d3Smrg} 693de2362d3Smrg 694de2362d3Smrg//XXX deal with clip offsets in clip setup 695de2362d3Smrgvoid 696de2362d3Smrgr600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 697de2362d3Smrg{ 698de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 699de2362d3Smrg 700de2362d3Smrg BEGIN_BATCH(4); 701de2362d3Smrg PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 702de2362d3Smrg E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 703de2362d3Smrg (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 704de2362d3Smrg E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 705de2362d3Smrg (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 706de2362d3Smrg END_BATCH(); 707de2362d3Smrg} 708de2362d3Smrg 709de2362d3Smrgvoid 710de2362d3Smrgr600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 711de2362d3Smrg{ 712de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 713de2362d3Smrg 714de2362d3Smrg BEGIN_BATCH(4); 715de2362d3Smrg PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 716de2362d3Smrg E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 717de2362d3Smrg (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 718de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 719de2362d3Smrg E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 720de2362d3Smrg (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 721de2362d3Smrg END_BATCH(); 722de2362d3Smrg} 723de2362d3Smrg 724de2362d3Smrgvoid 725de2362d3Smrgr600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 726de2362d3Smrg{ 727de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 728de2362d3Smrg 729de2362d3Smrg BEGIN_BATCH(4); 730de2362d3Smrg PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 731de2362d3Smrg E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 732de2362d3Smrg (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 733de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 734de2362d3Smrg E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 735de2362d3Smrg (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 736de2362d3Smrg END_BATCH(); 737de2362d3Smrg} 738de2362d3Smrg 739de2362d3Smrgvoid 740de2362d3Smrgr600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 741de2362d3Smrg{ 742de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 743de2362d3Smrg 744de2362d3Smrg BEGIN_BATCH(4); 745de2362d3Smrg PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 746de2362d3Smrg E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 747de2362d3Smrg (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 748de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 749de2362d3Smrg E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 750de2362d3Smrg (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 751de2362d3Smrg END_BATCH(); 752de2362d3Smrg} 753de2362d3Smrg 754de2362d3Smrgvoid 755de2362d3Smrgr600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 756de2362d3Smrg{ 757de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 758de2362d3Smrg 759de2362d3Smrg BEGIN_BATCH(4); 760de2362d3Smrg PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 761de2362d3Smrg E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 762de2362d3Smrg (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 763de2362d3Smrg E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 764de2362d3Smrg (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 765de2362d3Smrg END_BATCH(); 766de2362d3Smrg} 767de2362d3Smrg 768de2362d3Smrg/* 769de2362d3Smrg * Setup of default state 770de2362d3Smrg */ 771de2362d3Smrg 772de2362d3Smrgvoid 773de2362d3Smrgr600_set_default_state(ScrnInfoPtr pScrn) 774de2362d3Smrg{ 775de2362d3Smrg tex_resource_t tex_res; 776de2362d3Smrg shader_config_t fs_conf; 777de2362d3Smrg sq_config_t sq_conf; 778de2362d3Smrg int i; 779de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 780de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 781de2362d3Smrg 782de2362d3Smrg if (accel_state->XInited3D) 783de2362d3Smrg return; 784de2362d3Smrg 785de2362d3Smrg memset(&tex_res, 0, sizeof(tex_resource_t)); 786de2362d3Smrg memset(&fs_conf, 0, sizeof(shader_config_t)); 787de2362d3Smrg 788de2362d3Smrg accel_state->XInited3D = TRUE; 789de2362d3Smrg 790de2362d3Smrg r600_start_3d(pScrn); 791de2362d3Smrg 792de2362d3Smrg // SQ 793de2362d3Smrg sq_conf.ps_prio = 0; 794de2362d3Smrg sq_conf.vs_prio = 1; 795de2362d3Smrg sq_conf.gs_prio = 2; 796de2362d3Smrg sq_conf.es_prio = 3; 797de2362d3Smrg // need to set stack/thread/gpr limits based on the asic 798de2362d3Smrg // for now just set them low enough so any card will work 799de2362d3Smrg // see r600_cp.c in the drm 800de2362d3Smrg switch (info->ChipFamily) { 801de2362d3Smrg case CHIP_FAMILY_R600: 802de2362d3Smrg sq_conf.num_ps_gprs = 192; 803de2362d3Smrg sq_conf.num_vs_gprs = 56; 804de2362d3Smrg sq_conf.num_temp_gprs = 4; 805de2362d3Smrg sq_conf.num_gs_gprs = 0; 806de2362d3Smrg sq_conf.num_es_gprs = 0; 807de2362d3Smrg sq_conf.num_ps_threads = 136; 808de2362d3Smrg sq_conf.num_vs_threads = 48; 809de2362d3Smrg sq_conf.num_gs_threads = 4; 810de2362d3Smrg sq_conf.num_es_threads = 4; 811de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 812de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 813de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 814de2362d3Smrg sq_conf.num_es_stack_entries = 0; 815de2362d3Smrg break; 816de2362d3Smrg case CHIP_FAMILY_RV630: 817de2362d3Smrg case CHIP_FAMILY_RV635: 818de2362d3Smrg sq_conf.num_ps_gprs = 84; 819de2362d3Smrg sq_conf.num_vs_gprs = 36; 820de2362d3Smrg sq_conf.num_temp_gprs = 4; 821de2362d3Smrg sq_conf.num_gs_gprs = 0; 822de2362d3Smrg sq_conf.num_es_gprs = 0; 823de2362d3Smrg sq_conf.num_ps_threads = 144; 824de2362d3Smrg sq_conf.num_vs_threads = 40; 825de2362d3Smrg sq_conf.num_gs_threads = 4; 826de2362d3Smrg sq_conf.num_es_threads = 4; 827de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 828de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 829de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 830de2362d3Smrg sq_conf.num_es_stack_entries = 16; 831de2362d3Smrg break; 832de2362d3Smrg case CHIP_FAMILY_RV610: 833de2362d3Smrg case CHIP_FAMILY_RV620: 834de2362d3Smrg case CHIP_FAMILY_RS780: 835de2362d3Smrg case CHIP_FAMILY_RS880: 836de2362d3Smrg default: 837de2362d3Smrg sq_conf.num_ps_gprs = 84; 838de2362d3Smrg sq_conf.num_vs_gprs = 36; 839de2362d3Smrg sq_conf.num_temp_gprs = 4; 840de2362d3Smrg sq_conf.num_gs_gprs = 0; 841de2362d3Smrg sq_conf.num_es_gprs = 0; 842de2362d3Smrg sq_conf.num_ps_threads = 136; 843de2362d3Smrg sq_conf.num_vs_threads = 48; 844de2362d3Smrg sq_conf.num_gs_threads = 4; 845de2362d3Smrg sq_conf.num_es_threads = 4; 846de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 847de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 848de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 849de2362d3Smrg sq_conf.num_es_stack_entries = 16; 850de2362d3Smrg break; 851de2362d3Smrg case CHIP_FAMILY_RV670: 852de2362d3Smrg sq_conf.num_ps_gprs = 144; 853de2362d3Smrg sq_conf.num_vs_gprs = 40; 854de2362d3Smrg sq_conf.num_temp_gprs = 4; 855de2362d3Smrg sq_conf.num_gs_gprs = 0; 856de2362d3Smrg sq_conf.num_es_gprs = 0; 857de2362d3Smrg sq_conf.num_ps_threads = 136; 858de2362d3Smrg sq_conf.num_vs_threads = 48; 859de2362d3Smrg sq_conf.num_gs_threads = 4; 860de2362d3Smrg sq_conf.num_es_threads = 4; 861de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 862de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 863de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 864de2362d3Smrg sq_conf.num_es_stack_entries = 16; 865de2362d3Smrg break; 866de2362d3Smrg case CHIP_FAMILY_RV770: 867de2362d3Smrg sq_conf.num_ps_gprs = 192; 868de2362d3Smrg sq_conf.num_vs_gprs = 56; 869de2362d3Smrg sq_conf.num_temp_gprs = 4; 870de2362d3Smrg sq_conf.num_gs_gprs = 0; 871de2362d3Smrg sq_conf.num_es_gprs = 0; 872de2362d3Smrg sq_conf.num_ps_threads = 188; 873de2362d3Smrg sq_conf.num_vs_threads = 60; 874de2362d3Smrg sq_conf.num_gs_threads = 0; 875de2362d3Smrg sq_conf.num_es_threads = 0; 876de2362d3Smrg sq_conf.num_ps_stack_entries = 256; 877de2362d3Smrg sq_conf.num_vs_stack_entries = 256; 878de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 879de2362d3Smrg sq_conf.num_es_stack_entries = 0; 880de2362d3Smrg break; 881de2362d3Smrg case CHIP_FAMILY_RV730: 882de2362d3Smrg case CHIP_FAMILY_RV740: 883de2362d3Smrg sq_conf.num_ps_gprs = 84; 884de2362d3Smrg sq_conf.num_vs_gprs = 36; 885de2362d3Smrg sq_conf.num_temp_gprs = 4; 886de2362d3Smrg sq_conf.num_gs_gprs = 0; 887de2362d3Smrg sq_conf.num_es_gprs = 0; 888de2362d3Smrg sq_conf.num_ps_threads = 188; 889de2362d3Smrg sq_conf.num_vs_threads = 60; 890de2362d3Smrg sq_conf.num_gs_threads = 0; 891de2362d3Smrg sq_conf.num_es_threads = 0; 892de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 893de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 894de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 895de2362d3Smrg sq_conf.num_es_stack_entries = 0; 896de2362d3Smrg break; 897de2362d3Smrg case CHIP_FAMILY_RV710: 898de2362d3Smrg sq_conf.num_ps_gprs = 192; 899de2362d3Smrg sq_conf.num_vs_gprs = 56; 900de2362d3Smrg sq_conf.num_temp_gprs = 4; 901de2362d3Smrg sq_conf.num_gs_gprs = 0; 902de2362d3Smrg sq_conf.num_es_gprs = 0; 903de2362d3Smrg sq_conf.num_ps_threads = 144; 904de2362d3Smrg sq_conf.num_vs_threads = 48; 905de2362d3Smrg sq_conf.num_gs_threads = 0; 906de2362d3Smrg sq_conf.num_es_threads = 0; 907de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 908de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 909de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 910de2362d3Smrg sq_conf.num_es_stack_entries = 0; 911de2362d3Smrg break; 912de2362d3Smrg } 913de2362d3Smrg 914de2362d3Smrg r600_sq_setup(pScrn, &sq_conf); 915de2362d3Smrg 916de2362d3Smrg /* set fake reloc for unused depth */ 917de2362d3Smrg BEGIN_BATCH(3 + 2); 918de2362d3Smrg EREG(DB_DEPTH_INFO, 0); 919de2362d3Smrg RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 920de2362d3Smrg END_BATCH(); 921de2362d3Smrg 922de2362d3Smrg BEGIN_BATCH(80); 923de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) { 924de2362d3Smrg EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 925de2362d3Smrg (28 << TD_FIFO_CREDIT_shift))); 926de2362d3Smrg EREG(VC_ENHANCE, 0); 927de2362d3Smrg EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 928de2362d3Smrg EREG(DB_DEBUG, 0x82000000); /* ? */ 929de2362d3Smrg EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 930de2362d3Smrg (16 << DEPTH_FLUSH_shift) | 931de2362d3Smrg (0 << FORCE_SUMMARIZE_shift) | 932de2362d3Smrg (4 << DEPTH_PENDING_FREE_shift) | 933de2362d3Smrg (16 << DEPTH_CACHELINE_FREE_shift) | 934de2362d3Smrg 0)); 935de2362d3Smrg } else { 936de2362d3Smrg EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 937de2362d3Smrg (28 << TD_FIFO_CREDIT_shift))); 938de2362d3Smrg EREG(VC_ENHANCE, 0); 939de2362d3Smrg EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 940de2362d3Smrg EREG(DB_DEBUG, 0); 941de2362d3Smrg EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 942de2362d3Smrg (16 << DEPTH_FLUSH_shift) | 943de2362d3Smrg (0 << FORCE_SUMMARIZE_shift) | 944de2362d3Smrg (4 << DEPTH_PENDING_FREE_shift) | 945de2362d3Smrg (4 << DEPTH_CACHELINE_FREE_shift) | 946de2362d3Smrg 0)); 947de2362d3Smrg } 948de2362d3Smrg 949de2362d3Smrg PACK0(SQ_VTX_BASE_VTX_LOC, 2); 950de2362d3Smrg E32(0); 951de2362d3Smrg E32(0); 952de2362d3Smrg 953de2362d3Smrg PACK0(SQ_ESGS_RING_ITEMSIZE, 9); 954de2362d3Smrg E32(0); // SQ_ESGS_RING_ITEMSIZE 955de2362d3Smrg E32(0); // SQ_GSVS_RING_ITEMSIZE 956de2362d3Smrg E32(0); // SQ_ESTMP_RING_ITEMSIZE 957de2362d3Smrg E32(0); // SQ_GSTMP_RING_ITEMSIZE 958de2362d3Smrg E32(0); // SQ_VSTMP_RING_ITEMSIZE 959de2362d3Smrg E32(0); // SQ_PSTMP_RING_ITEMSIZE 960de2362d3Smrg E32(0); // SQ_FBUF_RING_ITEMSIZE 961de2362d3Smrg E32(0); // SQ_REDUC_RING_ITEMSIZE 962de2362d3Smrg E32(0); // SQ_GS_VERT_ITEMSIZE 963de2362d3Smrg 964de2362d3Smrg // DB 965de2362d3Smrg EREG(DB_DEPTH_CONTROL, 0); 966de2362d3Smrg PACK0(DB_RENDER_CONTROL, 2); 967de2362d3Smrg E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 968de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 969de2362d3Smrg E32(FORCE_SHADER_Z_ORDER_bit); 970de2362d3Smrg else 971de2362d3Smrg E32(0); 972de2362d3Smrg EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 973de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET1_shift) | 974de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET2_shift) | 975de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET3_shift))); 976de2362d3Smrg EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 977de2362d3Smrg DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 978de2362d3Smrg 979de2362d3Smrg PACK0(DB_STENCIL_CLEAR, 2); 980de2362d3Smrg E32(0); // DB_STENCIL_CLEAR 981de2362d3Smrg E32(0); // DB_DEPTH_CLEAR 982de2362d3Smrg 983de2362d3Smrg PACK0(DB_STENCILREFMASK, 3); 984de2362d3Smrg E32(0); // DB_STENCILREFMASK 985de2362d3Smrg E32(0); // DB_STENCILREFMASK_BF 986de2362d3Smrg E32(0); // SX_ALPHA_REF 987de2362d3Smrg 988de2362d3Smrg PACK0(CB_CLRCMP_CONTROL, 4); 989de2362d3Smrg E32(1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 990de2362d3Smrg E32(0); // CB_CLRCMP_SRC 991de2362d3Smrg E32(0); // CB_CLRCMP_DST 992de2362d3Smrg E32(0); // CB_CLRCMP_MSK 993de2362d3Smrg 994de2362d3Smrg EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 995de2362d3Smrg EREG(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 996de2362d3Smrg 997de2362d3Smrg PACK0(SX_ALPHA_TEST_CONTROL, 5); 998de2362d3Smrg E32(0); // SX_ALPHA_TEST_CONTROL 999de2362d3Smrg E32(0x00000000); // CB_BLEND_RED 1000de2362d3Smrg E32(0x00000000); // CB_BLEND_GREEN 1001de2362d3Smrg E32(0x00000000); // CB_BLEND_BLUE 1002de2362d3Smrg E32(0x00000000); // CB_BLEND_ALPHA 1003de2362d3Smrg 1004de2362d3Smrg EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 1005de2362d3Smrg (0 << WINDOW_Y_OFFSET_shift))); 1006de2362d3Smrg 1007de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 1008de2362d3Smrg EREG(R7xx_PA_SC_EDGERULE, 0x00000000); 1009de2362d3Smrg else 1010de2362d3Smrg EREG(R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 1011de2362d3Smrg 1012de2362d3Smrg EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 1013de2362d3Smrg 1014de2362d3Smrg END_BATCH(); 1015de2362d3Smrg 1016de2362d3Smrg /* clip boolean is set to always visible -> doesn't matter */ 1017de2362d3Smrg for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 1018de2362d3Smrg r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192); 1019de2362d3Smrg 1020de2362d3Smrg for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 1021de2362d3Smrg r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192); 1022de2362d3Smrg 1023de2362d3Smrg BEGIN_BATCH(49); 1024de2362d3Smrg PACK0(PA_SC_MPASS_PS_CNTL, 2); 1025de2362d3Smrg E32(0); 1026de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 1027de2362d3Smrg E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1028de2362d3Smrg else 1029de2362d3Smrg E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1030de2362d3Smrg 0x00500000)); /* ? */ 1031de2362d3Smrg 1032de2362d3Smrg PACK0(PA_SC_LINE_CNTL, 9); 1033de2362d3Smrg E32(0); // PA_SC_LINE_CNTL 1034de2362d3Smrg E32(0); // PA_SC_AA_CONFIG 1035de2362d3Smrg E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL 1036de2362d3Smrg (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 1037de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 1038de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 1039de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 1040de2362d3Smrg EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 1041de2362d3Smrg E32(0); // PA_SC_AA_SAMPLE_LOCS_MCTX 1042de2362d3Smrg E32(0); // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M 1043de2362d3Smrg 1044de2362d3Smrg EREG(PA_SC_AA_MASK, 0xFFFFFFFF); 1045de2362d3Smrg 1046de2362d3Smrg PACK0(PA_CL_CLIP_CNTL, 5); 1047de2362d3Smrg E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 1048de2362d3Smrg E32(FACE_bit); // PA_SU_SC_MODE_CNTL 1049de2362d3Smrg E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 1050de2362d3Smrg E32(0); // PA_CL_VS_OUT_CNTL 1051de2362d3Smrg E32(0); // PA_CL_NANINF_CNTL 1052de2362d3Smrg 1053de2362d3Smrg PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 1054de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL 1055de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_CLAMP 1056de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE 1057de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET 1058de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE 1059de2362d3Smrg E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET 1060de2362d3Smrg 1061de2362d3Smrg // SPI 1062de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 1063de2362d3Smrg EREG(R7xx_SPI_THREAD_GROUPING, 0); 1064de2362d3Smrg else 1065de2362d3Smrg EREG(R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1066de2362d3Smrg 1067de2362d3Smrg /* default Interpolator setup */ 1068de2362d3Smrg EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1069de2362d3Smrg (1 << SEMANTIC_1_shift))); 1070de2362d3Smrg PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1071de2362d3Smrg /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1072de2362d3Smrg E32(((0 << SEMANTIC_shift) | 1073de2362d3Smrg (0x01 << DEFAULT_VAL_shift) | 1074de2362d3Smrg SEL_CENTROID_bit)); 1075de2362d3Smrg /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1076de2362d3Smrg E32(((1 << SEMANTIC_shift) | 1077de2362d3Smrg (0x01 << DEFAULT_VAL_shift) | 1078de2362d3Smrg SEL_CENTROID_bit)); 1079de2362d3Smrg 1080de2362d3Smrg PACK0(SPI_INPUT_Z, 4); 1081de2362d3Smrg E32(0); // SPI_INPUT_Z 1082de2362d3Smrg E32(0); // SPI_FOG_CNTL 1083de2362d3Smrg E32(0); // SPI_FOG_FUNC_SCALE 1084de2362d3Smrg E32(0); // SPI_FOG_FUNC_BIAS 1085de2362d3Smrg 1086de2362d3Smrg END_BATCH(); 1087de2362d3Smrg 1088de2362d3Smrg // clear FS 1089de2362d3Smrg fs_conf.bo = accel_state->shaders_bo; 1090de2362d3Smrg r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1091de2362d3Smrg 1092de2362d3Smrg // VGT 1093de2362d3Smrg BEGIN_BATCH(46); 1094de2362d3Smrg PACK0(VGT_MAX_VTX_INDX, 4); 1095de2362d3Smrg E32(0xffffff); // VGT_MAX_VTX_INDX 1096de2362d3Smrg E32(0); // VGT_MIN_VTX_INDX 1097de2362d3Smrg E32(0); // VGT_INDX_OFFSET 1098de2362d3Smrg E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX 1099de2362d3Smrg 1100de2362d3Smrg EREG(VGT_PRIMITIVEID_EN, 0); 1101de2362d3Smrg EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 1102de2362d3Smrg 1103de2362d3Smrg PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 1104de2362d3Smrg E32(0); // VGT_INSTANCE_STEP_RATE_0 1105de2362d3Smrg E32(0); // VGT_INSTANCE_STEP_RATE_1 1106de2362d3Smrg 1107de2362d3Smrg PACK0(PA_SU_POINT_SIZE, 17); 1108de2362d3Smrg E32(0); // PA_SU_POINT_SIZE 1109de2362d3Smrg E32(0); // PA_SU_POINT_MINMAX 1110de2362d3Smrg E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 1111de2362d3Smrg E32(0); // PA_SC_LINE_STIPPLE 1112de2362d3Smrg E32(0); // VGT_OUTPUT_PATH_CNTL 1113de2362d3Smrg E32(0); // VGT_HOS_CNTL 1114de2362d3Smrg E32(0); // VGT_HOS_MAX_TESS_LEVEL 1115de2362d3Smrg E32(0); // VGT_HOS_MIN_TESS_LEVEL 1116de2362d3Smrg E32(0); // VGT_HOS_REUSE_DEPTH 1117de2362d3Smrg E32(0); // VGT_GROUP_PRIM_TYPE 1118de2362d3Smrg E32(0); // VGT_GROUP_FIRST_DECR 1119de2362d3Smrg E32(0); // VGT_GROUP_DECR 1120de2362d3Smrg E32(0); // VGT_GROUP_VECT_0_CNTL 1121de2362d3Smrg E32(0); // VGT_GROUP_VECT_1_CNTL 1122de2362d3Smrg E32(0); // VGT_GROUP_VECT_0_FMT_CNTL 1123de2362d3Smrg E32(0); // VGT_GROUP_VECT_1_FMT_CNTL 1124de2362d3Smrg E32(0); // VGT_GS_MODE 1125de2362d3Smrg 1126de2362d3Smrg PACK0(VGT_STRMOUT_EN, 3); 1127de2362d3Smrg E32(0); // VGT_STRMOUT_EN 1128de2362d3Smrg E32(0); // VGT_REUSE_OFF 1129de2362d3Smrg E32(0); // VGT_VTX_CNT_EN 1130de2362d3Smrg 1131de2362d3Smrg EREG(VGT_STRMOUT_BUFFER_EN, 0); 1132de2362d3Smrg EREG(SX_MISC, 0); 1133de2362d3Smrg END_BATCH(); 1134de2362d3Smrg} 1135de2362d3Smrg 1136de2362d3Smrg 1137de2362d3Smrg/* 1138de2362d3Smrg * Commands 1139de2362d3Smrg */ 1140de2362d3Smrg 1141de2362d3Smrgvoid 1142de2362d3Smrgr600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices) 1143de2362d3Smrg{ 1144de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1145de2362d3Smrg uint32_t i, count; 1146de2362d3Smrg 1147de2362d3Smrg // calculate num of packets 1148de2362d3Smrg count = 2; 1149de2362d3Smrg if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1150de2362d3Smrg count += (draw_conf->num_indices + 1) / 2; 1151de2362d3Smrg else 1152de2362d3Smrg count += draw_conf->num_indices; 1153de2362d3Smrg 1154de2362d3Smrg BEGIN_BATCH(8 + count); 1155de2362d3Smrg EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1156de2362d3Smrg PACK3(IT_INDEX_TYPE, 1); 1157de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 1158de2362d3Smrg E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1159de2362d3Smrg#else 1160de2362d3Smrg E32(draw_conf->index_type); 1161de2362d3Smrg#endif 1162de2362d3Smrg PACK3(IT_NUM_INSTANCES, 1); 1163de2362d3Smrg E32(draw_conf->num_instances); 1164de2362d3Smrg 1165de2362d3Smrg PACK3(IT_DRAW_INDEX_IMMD, count); 1166de2362d3Smrg E32(draw_conf->num_indices); 1167de2362d3Smrg E32(draw_conf->vgt_draw_initiator); 1168de2362d3Smrg 1169de2362d3Smrg if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1170de2362d3Smrg for (i = 0; i < draw_conf->num_indices; i += 2) { 1171de2362d3Smrg if ((i + 1) == draw_conf->num_indices) 1172de2362d3Smrg E32(indices[i]); 1173de2362d3Smrg else 1174de2362d3Smrg E32((indices[i] | (indices[i + 1] << 16))); 1175de2362d3Smrg } 1176de2362d3Smrg } else { 1177de2362d3Smrg for (i = 0; i < draw_conf->num_indices; i++) 1178de2362d3Smrg E32(indices[i]); 1179de2362d3Smrg } 1180de2362d3Smrg END_BATCH(); 1181de2362d3Smrg} 1182de2362d3Smrg 1183de2362d3Smrgvoid 1184de2362d3Smrgr600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1185de2362d3Smrg{ 1186de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1187de2362d3Smrg 1188de2362d3Smrg BEGIN_BATCH(10); 1189de2362d3Smrg EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 1190de2362d3Smrg PACK3(IT_INDEX_TYPE, 1); 1191de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 1192de2362d3Smrg E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1193de2362d3Smrg#else 1194de2362d3Smrg E32(draw_conf->index_type); 1195de2362d3Smrg#endif 1196de2362d3Smrg PACK3(IT_NUM_INSTANCES, 1); 1197de2362d3Smrg E32(draw_conf->num_instances); 1198de2362d3Smrg PACK3(IT_DRAW_INDEX_AUTO, 2); 1199de2362d3Smrg E32(draw_conf->num_indices); 1200de2362d3Smrg E32(draw_conf->vgt_draw_initiator); 1201de2362d3Smrg END_BATCH(); 1202de2362d3Smrg} 1203de2362d3Smrg 1204de2362d3Smrgvoid r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1205de2362d3Smrg{ 1206de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1207de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 1208de2362d3Smrg draw_config_t draw_conf; 1209de2362d3Smrg vtx_resource_t vtx_res; 1210de2362d3Smrg 1211de2362d3Smrg if (accel_state->vbo.vb_start_op == -1) 1212de2362d3Smrg return; 1213de2362d3Smrg 1214de2362d3Smrg CLEAR (draw_conf); 1215de2362d3Smrg CLEAR (vtx_res); 1216de2362d3Smrg 1217de2362d3Smrg if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 1218de2362d3Smrg R600IBDiscard(pScrn); 1219de2362d3Smrg return; 1220de2362d3Smrg } 1221de2362d3Smrg 1222de2362d3Smrg /* Vertex buffer setup */ 1223de2362d3Smrg accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1224de2362d3Smrg vtx_res.id = SQ_VTX_RESOURCE_vs; 1225de2362d3Smrg vtx_res.vtx_size_dw = vtx_size / 4; 1226de2362d3Smrg vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1227de2362d3Smrg vtx_res.mem_req_size = 1; 1228de2362d3Smrg vtx_res.vb_addr = accel_state->vbo.vb_start_op; 1229de2362d3Smrg vtx_res.bo = accel_state->vbo.vb_bo; 1230de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 1231de2362d3Smrg vtx_res.endian = SQ_ENDIAN_8IN32; 1232de2362d3Smrg#endif 1233de2362d3Smrg r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1234de2362d3Smrg 1235de2362d3Smrg /* Draw */ 1236de2362d3Smrg draw_conf.prim_type = DI_PT_RECTLIST; 1237de2362d3Smrg draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1238de2362d3Smrg draw_conf.num_instances = 1; 1239de2362d3Smrg draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1240de2362d3Smrg draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1241de2362d3Smrg 1242de2362d3Smrg r600_draw_auto(pScrn, &draw_conf); 1243de2362d3Smrg 1244de2362d3Smrg /* XXX drm should handle this in fence submit */ 1245de2362d3Smrg r600_wait_3d_idle_clean(pScrn); 1246de2362d3Smrg 1247de2362d3Smrg /* sync dst surface */ 1248de2362d3Smrg r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1249de2362d3Smrg accel_state->dst_size, 0, 1250de2362d3Smrg accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1251de2362d3Smrg 1252de2362d3Smrg accel_state->vbo.vb_start_op = -1; 1253de2362d3Smrg accel_state->ib_reset_op = 0; 1254de2362d3Smrg 1255de2362d3Smrg} 1256de2362d3Smrg 1257