1de2362d3Smrg/* 2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc. 3de2362d3Smrg * 4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5de2362d3Smrg * copy of this software and associated documentation files (the "Software"), 6de2362d3Smrg * to deal in the Software without restriction, including without limitation 7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the 9de2362d3Smrg * Software is furnished to do so, subject to the following conditions: 10de2362d3Smrg * 11de2362d3Smrg * The above copyright notice and this permission notice (including the next 12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the 13de2362d3Smrg * Software. 14de2362d3Smrg * 15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21de2362d3Smrg * SOFTWARE. 22de2362d3Smrg * 23de2362d3Smrg * Authors: Alex Deucher <alexander.deucher@amd.com> 24de2362d3Smrg * Matthias Hopf <mhopf@suse.de> 25de2362d3Smrg */ 26de2362d3Smrg#ifdef HAVE_CONFIG_H 27de2362d3Smrg#include "config.h" 28de2362d3Smrg#endif 29de2362d3Smrg 30de2362d3Smrg#include "xf86.h" 31de2362d3Smrg 32de2362d3Smrg#include <errno.h> 33de2362d3Smrg 34de2362d3Smrg#include "radeon.h" 35de2362d3Smrg#include "r600_shader.h" 36de2362d3Smrg#include "radeon_reg.h" 37de2362d3Smrg#include "r600_reg.h" 38de2362d3Smrg#include "r600_state.h" 39de2362d3Smrg 40de2362d3Smrg#include "radeon_vbo.h" 41de2362d3Smrg#include "radeon_exa_shared.h" 42de2362d3Smrg 43de2362d3Smrgstatic const uint32_t R600_ROP[16] = { 44de2362d3Smrg RADEON_ROP3_ZERO, /* GXclear */ 45de2362d3Smrg RADEON_ROP3_DSa, /* Gxand */ 46de2362d3Smrg RADEON_ROP3_SDna, /* GXandReverse */ 47de2362d3Smrg RADEON_ROP3_S, /* GXcopy */ 48de2362d3Smrg RADEON_ROP3_DSna, /* GXandInverted */ 49de2362d3Smrg RADEON_ROP3_D, /* GXnoop */ 50de2362d3Smrg RADEON_ROP3_DSx, /* GXxor */ 51de2362d3Smrg RADEON_ROP3_DSo, /* GXor */ 52de2362d3Smrg RADEON_ROP3_DSon, /* GXnor */ 53de2362d3Smrg RADEON_ROP3_DSxn, /* GXequiv */ 54de2362d3Smrg RADEON_ROP3_Dn, /* GXinvert */ 55de2362d3Smrg RADEON_ROP3_SDno, /* GXorReverse */ 56de2362d3Smrg RADEON_ROP3_Sn, /* GXcopyInverted */ 57de2362d3Smrg RADEON_ROP3_DSno, /* GXorInverted */ 58de2362d3Smrg RADEON_ROP3_DSan, /* GXnand */ 59de2362d3Smrg RADEON_ROP3_ONE, /* GXset */ 60de2362d3Smrg}; 61de2362d3Smrg 62de2362d3Smrg/* we try and batch operations together under KMS - 63de2362d3Smrg but it doesn't work yet without misrendering */ 64de2362d3Smrg#define KMS_MULTI_OP 1 65de2362d3Smrg 66de2362d3Smrg/* Flush the indirect buffer to the kernel for submission to the card */ 6718781e08Smrgvoid R600CPFlushIndirect(ScrnInfoPtr pScrn) 68de2362d3Smrg{ 6918781e08Smrg radeon_cs_flush_indirect(pScrn); 70de2362d3Smrg} 71de2362d3Smrg 7218781e08Smrgvoid R600IBDiscard(ScrnInfoPtr pScrn) 73de2362d3Smrg{ 7418781e08Smrg radeon_ib_discard(pScrn); 75de2362d3Smrg} 76de2362d3Smrg 77de2362d3Smrgvoid 7818781e08Smrgr600_wait_3d_idle_clean(ScrnInfoPtr pScrn) 79de2362d3Smrg{ 80de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 81de2362d3Smrg 82de2362d3Smrg //flush caches, don't generate timestamp 83de2362d3Smrg BEGIN_BATCH(5); 8418781e08Smrg PACK3(IT_EVENT_WRITE, 1); 8518781e08Smrg E32(CACHE_FLUSH_AND_INV_EVENT); 86de2362d3Smrg // wait for 3D idle clean 8718781e08Smrg EREG(WAIT_UNTIL, (WAIT_3D_IDLE_bit | 88de2362d3Smrg WAIT_3D_IDLECLEAN_bit)); 89de2362d3Smrg END_BATCH(); 90de2362d3Smrg} 91de2362d3Smrg 92de2362d3Smrgvoid 9318781e08Smrgr600_wait_3d_idle(ScrnInfoPtr pScrn) 94de2362d3Smrg{ 95de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 96de2362d3Smrg 97de2362d3Smrg BEGIN_BATCH(3); 9818781e08Smrg EREG(WAIT_UNTIL, WAIT_3D_IDLE_bit); 99de2362d3Smrg END_BATCH(); 100de2362d3Smrg} 101de2362d3Smrg 102de2362d3Smrgvoid 10318781e08Smrgr600_start_3d(ScrnInfoPtr pScrn) 104de2362d3Smrg{ 105de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 106de2362d3Smrg 107de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) { 108de2362d3Smrg BEGIN_BATCH(5); 10918781e08Smrg PACK3(IT_START_3D_CMDBUF, 1); 11018781e08Smrg E32(0); 111de2362d3Smrg } else 112de2362d3Smrg BEGIN_BATCH(3); 113de2362d3Smrg 11418781e08Smrg PACK3(IT_CONTEXT_CONTROL, 2); 11518781e08Smrg E32(0x80000000); 11618781e08Smrg E32(0x80000000); 117de2362d3Smrg END_BATCH(); 118de2362d3Smrg 119de2362d3Smrg} 120de2362d3Smrg 121de2362d3Smrg/* 122de2362d3Smrg * Setup of functional groups 123de2362d3Smrg */ 124de2362d3Smrg 125de2362d3Smrg// asic stack/thread/gpr limits - need to query the drm 126de2362d3Smrgstatic void 12718781e08Smrgr600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) 128de2362d3Smrg{ 129de2362d3Smrg uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 130de2362d3Smrg uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 131de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 132de2362d3Smrg 133de2362d3Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 134de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 135de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 136de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 137de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 138de2362d3Smrg sq_config = 0; // no VC 139de2362d3Smrg else 140de2362d3Smrg sq_config = VC_ENABLE_bit; 141de2362d3Smrg 142de2362d3Smrg sq_config |= (DX9_CONSTS_bit | 143de2362d3Smrg ALU_INST_PREFER_VECTOR_bit | 144de2362d3Smrg (sq_conf->ps_prio << PS_PRIO_shift) | 145de2362d3Smrg (sq_conf->vs_prio << VS_PRIO_shift) | 146de2362d3Smrg (sq_conf->gs_prio << GS_PRIO_shift) | 147de2362d3Smrg (sq_conf->es_prio << ES_PRIO_shift)); 148de2362d3Smrg 149de2362d3Smrg sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | 150de2362d3Smrg (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | 151de2362d3Smrg (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); 152de2362d3Smrg sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | 153de2362d3Smrg (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); 154de2362d3Smrg 155de2362d3Smrg sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | 156de2362d3Smrg (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | 157de2362d3Smrg (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | 158de2362d3Smrg (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); 159de2362d3Smrg 160de2362d3Smrg sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | 161de2362d3Smrg (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); 162de2362d3Smrg 163de2362d3Smrg sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | 164de2362d3Smrg (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); 165de2362d3Smrg 166de2362d3Smrg BEGIN_BATCH(8); 16718781e08Smrg PACK0(SQ_CONFIG, 6); 16818781e08Smrg E32(sq_config); 16918781e08Smrg E32(sq_gpr_resource_mgmt_1); 17018781e08Smrg E32(sq_gpr_resource_mgmt_2); 17118781e08Smrg E32(sq_thread_resource_mgmt); 17218781e08Smrg E32(sq_stack_resource_mgmt_1); 17318781e08Smrg E32(sq_stack_resource_mgmt_2); 17418781e08Smrg END_BATCH(); 17518781e08Smrg} 17618781e08Smrg 17718781e08Smrgvoid r600_set_blend_color(ScrnInfoPtr pScrn, float *color) 17818781e08Smrg{ 17918781e08Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 18018781e08Smrg 18118781e08Smrg BEGIN_BATCH(2 + 4); 18218781e08Smrg PACK0(CB_BLEND_RED, 4); 18318781e08Smrg EFLOAT(color[0]); /* R */ 18418781e08Smrg EFLOAT(color[1]); /* G */ 18518781e08Smrg EFLOAT(color[2]); /* B */ 18618781e08Smrg EFLOAT(color[3]); /* A */ 1877314432eSmrg END_BATCH(); 1887314432eSmrg} 1897314432eSmrg 19018781e08Smrg 191de2362d3Smrgvoid 19218781e08Smrgr600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) 193de2362d3Smrg{ 194de2362d3Smrg uint32_t cb_color_info, cb_color_control; 195de2362d3Smrg unsigned pitch, slice, h, array_mode; 196de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 197de2362d3Smrg 198de2362d3Smrg 199de2362d3Smrg if (cb_conf->surface) { 200de2362d3Smrg switch (cb_conf->surface->level[0].mode) { 201de2362d3Smrg case RADEON_SURF_MODE_1D: 202de2362d3Smrg array_mode = 2; 203de2362d3Smrg break; 204de2362d3Smrg case RADEON_SURF_MODE_2D: 205de2362d3Smrg array_mode = 4; 206de2362d3Smrg break; 207de2362d3Smrg default: 208de2362d3Smrg array_mode = 0; 209de2362d3Smrg break; 210de2362d3Smrg } 211de2362d3Smrg pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1; 212de2362d3Smrg slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1; 213de2362d3Smrg } else 214de2362d3Smrg { 215de2362d3Smrg array_mode = cb_conf->array_mode; 216de2362d3Smrg pitch = (cb_conf->w / 8) - 1; 217de2362d3Smrg h = RADEON_ALIGN(cb_conf->h, 8); 218de2362d3Smrg slice = ((cb_conf->w * h) / 64) - 1; 219de2362d3Smrg } 220de2362d3Smrg 221de2362d3Smrg cb_color_info = ((cb_conf->endian << ENDIAN_shift) | 222de2362d3Smrg (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | 223de2362d3Smrg (array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | 224de2362d3Smrg (cb_conf->number_type << NUMBER_TYPE_shift) | 225de2362d3Smrg (cb_conf->comp_swap << COMP_SWAP_shift) | 226de2362d3Smrg (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); 227de2362d3Smrg if (cb_conf->read_size) 228de2362d3Smrg cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; 229de2362d3Smrg if (cb_conf->blend_clamp) 230de2362d3Smrg cb_color_info |= BLEND_CLAMP_bit; 231de2362d3Smrg if (cb_conf->clear_color) 232de2362d3Smrg cb_color_info |= CLEAR_COLOR_bit; 233de2362d3Smrg if (cb_conf->blend_bypass) 234de2362d3Smrg cb_color_info |= BLEND_BYPASS_bit; 235de2362d3Smrg if (cb_conf->blend_float32) 236de2362d3Smrg cb_color_info |= BLEND_FLOAT32_bit; 237de2362d3Smrg if (cb_conf->simple_float) 238de2362d3Smrg cb_color_info |= SIMPLE_FLOAT_bit; 239de2362d3Smrg if (cb_conf->round_mode) 240de2362d3Smrg cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; 241de2362d3Smrg if (cb_conf->tile_compact) 242de2362d3Smrg cb_color_info |= TILE_COMPACT_bit; 243de2362d3Smrg if (cb_conf->source_format) 244de2362d3Smrg cb_color_info |= SOURCE_FORMAT_bit; 245de2362d3Smrg 246de2362d3Smrg BEGIN_BATCH(3 + 2); 24718781e08Smrg EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); 248de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 249de2362d3Smrg END_BATCH(); 250de2362d3Smrg 251de2362d3Smrg // rv6xx workaround 252de2362d3Smrg if ((info->ChipFamily > CHIP_FAMILY_R600) && 253de2362d3Smrg (info->ChipFamily < CHIP_FAMILY_RV770)) { 254de2362d3Smrg BEGIN_BATCH(2); 25518781e08Smrg PACK3(IT_SURFACE_BASE_UPDATE, 1); 25618781e08Smrg E32((2 << cb_conf->id)); 257de2362d3Smrg END_BATCH(); 258de2362d3Smrg } 259de2362d3Smrg /* Set CMASK & TILE buffer to the offset of color buffer as 260de2362d3Smrg * we don't use those this shouldn't cause any issue and we 261de2362d3Smrg * then have a valid cmd stream 262de2362d3Smrg */ 263de2362d3Smrg BEGIN_BATCH(3 + 2); 26418781e08Smrg EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 265de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 266de2362d3Smrg END_BATCH(); 267de2362d3Smrg BEGIN_BATCH(3 + 2); 26818781e08Smrg EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 269de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 270de2362d3Smrg END_BATCH(); 271de2362d3Smrg BEGIN_BATCH(9); 272de2362d3Smrg // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib 27318781e08Smrg EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | 274de2362d3Smrg (slice << SLICE_TILE_MAX_shift))); 27518781e08Smrg EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | 276de2362d3Smrg (0 << SLICE_MAX_shift))); 27718781e08Smrg EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | 278de2362d3Smrg (0 << FMASK_TILE_MAX_shift))); 279de2362d3Smrg END_BATCH(); 280de2362d3Smrg 281de2362d3Smrg BEGIN_BATCH(3 + 2); 28218781e08Smrg EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); 283de2362d3Smrg RELOC_BATCH(cb_conf->bo, 0, domain); 284de2362d3Smrg END_BATCH(); 285de2362d3Smrg 286de2362d3Smrg BEGIN_BATCH(9); 28718781e08Smrg EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); 288de2362d3Smrg cb_color_control = R600_ROP[cb_conf->rop] | 289de2362d3Smrg (cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift); 290de2362d3Smrg if (info->ChipFamily == CHIP_FAMILY_R600) { 291de2362d3Smrg /* no per-MRT blend on R600 */ 29218781e08Smrg EREG(CB_COLOR_CONTROL, cb_color_control); 29318781e08Smrg EREG(CB_BLEND_CONTROL, cb_conf->blendcntl); 294de2362d3Smrg } else { 295de2362d3Smrg if (cb_conf->blend_enable) 296de2362d3Smrg cb_color_control |= PER_MRT_BLEND_bit; 29718781e08Smrg EREG(CB_COLOR_CONTROL, cb_color_control); 29818781e08Smrg EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); 299de2362d3Smrg } 300de2362d3Smrg END_BATCH(); 301de2362d3Smrg} 302de2362d3Smrg 303de2362d3Smrgstatic void 30418781e08Smrgr600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, 305de2362d3Smrg uint32_t size, uint64_t mc_addr, 306de2362d3Smrg struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) 307de2362d3Smrg{ 308de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 309de2362d3Smrg uint32_t cp_coher_size; 310de2362d3Smrg if (size == 0xffffffff) 311de2362d3Smrg cp_coher_size = 0xffffffff; 312de2362d3Smrg else 313de2362d3Smrg cp_coher_size = ((size + 255) >> 8); 314de2362d3Smrg 315de2362d3Smrg BEGIN_BATCH(5 + 2); 31618781e08Smrg PACK3(IT_SURFACE_SYNC, 4); 31718781e08Smrg E32(sync_type); 31818781e08Smrg E32(cp_coher_size); 31918781e08Smrg E32((mc_addr >> 8)); 32018781e08Smrg E32(10); /* poll interval */ 321de2362d3Smrg RELOC_BATCH(bo, rdomains, wdomain); 322de2362d3Smrg END_BATCH(); 323de2362d3Smrg} 324de2362d3Smrg 325de2362d3Smrg/* inserts a wait for vline in the command stream */ 326de2362d3Smrgvoid 32718781e08Smrgr600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, 328de2362d3Smrg xf86CrtcPtr crtc, int start, int stop) 329de2362d3Smrg{ 330de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 33118781e08Smrg drmmode_crtc_private_ptr drmmode_crtc; 332de2362d3Smrg 333de2362d3Smrg if (!crtc) 334de2362d3Smrg return; 335de2362d3Smrg 336de2362d3Smrg if (!crtc->enabled) 337de2362d3Smrg return; 338de2362d3Smrg 33918781e08Smrg if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) 34018781e08Smrg return; 341de2362d3Smrg 342de2362d3Smrg start = max(start, crtc->y); 343de2362d3Smrg stop = min(stop, crtc->y + crtc->mode.VDisplay); 344de2362d3Smrg 345de2362d3Smrg if (start >= stop) 346de2362d3Smrg return; 347de2362d3Smrg 34818781e08Smrg drmmode_crtc = crtc->driver_private; 34918781e08Smrg 35018781e08Smrg BEGIN_BATCH(11); 35118781e08Smrg /* set the VLINE range */ 35218781e08Smrg EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */ 35318781e08Smrg (start << AVIVO_D1MODE_VLINE_START_SHIFT) | 35418781e08Smrg (stop << AVIVO_D1MODE_VLINE_END_SHIFT)); 35518781e08Smrg 35618781e08Smrg /* tell the CP to poll the VLINE state register */ 35718781e08Smrg PACK3(IT_WAIT_REG_MEM, 6); 35818781e08Smrg E32(IT_WAIT_REG | IT_WAIT_EQ); 35918781e08Smrg E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS)); 36018781e08Smrg E32(0); 36118781e08Smrg E32(0); // Ref value 36218781e08Smrg E32(AVIVO_D1MODE_VLINE_STAT); // Mask 36318781e08Smrg E32(10); // Wait interval 36418781e08Smrg /* add crtc reloc */ 36518781e08Smrg PACK3(IT_NOP, 1); 36618781e08Smrg E32(drmmode_crtc->mode_crtc->crtc_id); 36718781e08Smrg END_BATCH(); 368de2362d3Smrg} 369de2362d3Smrg 370de2362d3Smrgvoid 37118781e08Smrgr600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp) 372de2362d3Smrg{ 373de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 374de2362d3Smrg 375de2362d3Smrg BEGIN_BATCH(8); 376de2362d3Smrg /* Interpolator setup */ 37718781e08Smrg EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); 37818781e08Smrg PACK0(SPI_PS_IN_CONTROL_0, 3); 37918781e08Smrg E32((num_interp << NUM_INTERP_shift)); 38018781e08Smrg E32(0); 38118781e08Smrg E32(0); 382de2362d3Smrg END_BATCH(); 383de2362d3Smrg} 384de2362d3Smrg 385de2362d3Smrgvoid 38618781e08Smrgr600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) 387de2362d3Smrg{ 388de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 389de2362d3Smrg uint32_t sq_pgm_resources; 390de2362d3Smrg 391de2362d3Smrg sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | 392de2362d3Smrg (fs_conf->stack_size << STACK_SIZE_shift)); 393de2362d3Smrg 394de2362d3Smrg if (fs_conf->dx10_clamp) 395de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; 396de2362d3Smrg 397de2362d3Smrg BEGIN_BATCH(3 + 2); 39818781e08Smrg EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); 399de2362d3Smrg RELOC_BATCH(fs_conf->bo, domain, 0); 400de2362d3Smrg END_BATCH(); 401de2362d3Smrg 402de2362d3Smrg BEGIN_BATCH(6); 40318781e08Smrg EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); 40418781e08Smrg EREG(SQ_PGM_CF_OFFSET_FS, 0); 405de2362d3Smrg END_BATCH(); 406de2362d3Smrg} 407de2362d3Smrg 408de2362d3Smrgvoid 40918781e08Smrgr600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) 410de2362d3Smrg{ 411de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 412de2362d3Smrg uint32_t sq_pgm_resources; 413de2362d3Smrg 414de2362d3Smrg sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | 415de2362d3Smrg (vs_conf->stack_size << STACK_SIZE_shift)); 416de2362d3Smrg 417de2362d3Smrg if (vs_conf->dx10_clamp) 418de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; 419de2362d3Smrg if (vs_conf->fetch_cache_lines) 420de2362d3Smrg sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 421de2362d3Smrg if (vs_conf->uncached_first_inst) 422de2362d3Smrg sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 423de2362d3Smrg 424de2362d3Smrg /* flush SQ cache */ 42518781e08Smrg r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 426de2362d3Smrg vs_conf->shader_size, vs_conf->shader_addr, 427de2362d3Smrg vs_conf->bo, domain, 0); 428de2362d3Smrg 429de2362d3Smrg BEGIN_BATCH(3 + 2); 43018781e08Smrg EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); 431de2362d3Smrg RELOC_BATCH(vs_conf->bo, domain, 0); 432de2362d3Smrg END_BATCH(); 433de2362d3Smrg 434de2362d3Smrg BEGIN_BATCH(6); 43518781e08Smrg EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources); 43618781e08Smrg EREG(SQ_PGM_CF_OFFSET_VS, 0); 437de2362d3Smrg END_BATCH(); 438de2362d3Smrg} 439de2362d3Smrg 440de2362d3Smrgvoid 44118781e08Smrgr600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) 442de2362d3Smrg{ 443de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 444de2362d3Smrg uint32_t sq_pgm_resources; 445de2362d3Smrg 446de2362d3Smrg sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | 447de2362d3Smrg (ps_conf->stack_size << STACK_SIZE_shift)); 448de2362d3Smrg 449de2362d3Smrg if (ps_conf->dx10_clamp) 450de2362d3Smrg sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; 451de2362d3Smrg if (ps_conf->fetch_cache_lines) 452de2362d3Smrg sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); 453de2362d3Smrg if (ps_conf->uncached_first_inst) 454de2362d3Smrg sq_pgm_resources |= UNCACHED_FIRST_INST_bit; 455de2362d3Smrg if (ps_conf->clamp_consts) 456de2362d3Smrg sq_pgm_resources |= CLAMP_CONSTS_bit; 457de2362d3Smrg 458de2362d3Smrg /* flush SQ cache */ 45918781e08Smrg r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, 460de2362d3Smrg ps_conf->shader_size, ps_conf->shader_addr, 461de2362d3Smrg ps_conf->bo, domain, 0); 462de2362d3Smrg 463de2362d3Smrg BEGIN_BATCH(3 + 2); 46418781e08Smrg EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); 465de2362d3Smrg RELOC_BATCH(ps_conf->bo, domain, 0); 466de2362d3Smrg END_BATCH(); 467de2362d3Smrg 468de2362d3Smrg BEGIN_BATCH(9); 46918781e08Smrg EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources); 47018781e08Smrg EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode); 47118781e08Smrg EREG(SQ_PGM_CF_OFFSET_PS, 0); 472de2362d3Smrg END_BATCH(); 473de2362d3Smrg} 474de2362d3Smrg 475de2362d3Smrgvoid 47618781e08Smrgr600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf) 477de2362d3Smrg{ 478de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 479de2362d3Smrg int i; 480de2362d3Smrg const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); 481de2362d3Smrg 482de2362d3Smrg BEGIN_BATCH(2 + countreg); 48318781e08Smrg PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); 484de2362d3Smrg for (i = 0; i < countreg; i++) 48518781e08Smrg EFLOAT(const_buf[i]); 486de2362d3Smrg END_BATCH(); 487de2362d3Smrg} 488de2362d3Smrg 489de2362d3Smrgvoid 49018781e08Smrgr600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) 491de2362d3Smrg{ 492de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 493de2362d3Smrg /* bool register order is: ps, vs, gs; one register each 494de2362d3Smrg * 1 bits per bool; 32 bools each for ps, vs, gs. 495de2362d3Smrg */ 496de2362d3Smrg BEGIN_BATCH(3); 49718781e08Smrg EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); 498de2362d3Smrg END_BATCH(); 499de2362d3Smrg} 500de2362d3Smrg 501de2362d3Smrgstatic void 50218781e08Smrgr600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) 503de2362d3Smrg{ 504de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 505de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 506de2362d3Smrg uint32_t sq_vtx_constant_word2; 507de2362d3Smrg 508de2362d3Smrg sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | 509de2362d3Smrg ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | 510de2362d3Smrg (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | 511de2362d3Smrg (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | 512de2362d3Smrg (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); 513de2362d3Smrg if (res->clamp_x) 514de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; 515de2362d3Smrg 516de2362d3Smrg if (res->format_comp_all) 517de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; 518de2362d3Smrg 519de2362d3Smrg if (res->srf_mode_all) 520de2362d3Smrg sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; 521de2362d3Smrg 522de2362d3Smrg /* flush vertex cache */ 523de2362d3Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 524de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 525de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 526de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 527de2362d3Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 52818781e08Smrg r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 52918781e08Smrg accel_state->vbo.vb_offset, 0, 530de2362d3Smrg res->bo, 531de2362d3Smrg domain, 0); 532de2362d3Smrg else 53318781e08Smrg r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, 53418781e08Smrg accel_state->vbo.vb_offset, 0, 535de2362d3Smrg res->bo, 536de2362d3Smrg domain, 0); 537de2362d3Smrg 538de2362d3Smrg BEGIN_BATCH(9 + 2); 53918781e08Smrg PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); 54018781e08Smrg E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS 54118781e08Smrg E32((res->vtx_num_entries << 2) - 1); // 1: SIZE 54218781e08Smrg E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN 54318781e08Smrg E32(res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? 54418781e08Smrg E32(0); // 4: n/a 54518781e08Smrg E32(0); // 5: n/a 54618781e08Smrg E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE 547de2362d3Smrg RELOC_BATCH(res->bo, domain, 0); 548de2362d3Smrg END_BATCH(); 549de2362d3Smrg} 550de2362d3Smrg 551de2362d3Smrgvoid 55218781e08Smrgr600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) 553de2362d3Smrg{ 554de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 555de2362d3Smrg uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 556de2362d3Smrg uint32_t sq_tex_resource_word5, sq_tex_resource_word6; 557de2362d3Smrg uint32_t array_mode, pitch; 558de2362d3Smrg 559de2362d3Smrg if (tex_res->surface) { 560de2362d3Smrg switch (tex_res->surface->level[0].mode) { 561de2362d3Smrg case RADEON_SURF_MODE_1D: 562de2362d3Smrg array_mode = 2; 563de2362d3Smrg break; 564de2362d3Smrg case RADEON_SURF_MODE_2D: 565de2362d3Smrg array_mode = 4; 566de2362d3Smrg break; 567de2362d3Smrg default: 568de2362d3Smrg array_mode = 0; 569de2362d3Smrg break; 570de2362d3Smrg } 571de2362d3Smrg pitch = tex_res->surface->level[0].nblk_x >> 3; 572de2362d3Smrg } else 573de2362d3Smrg { 574de2362d3Smrg array_mode = tex_res->tile_mode; 575de2362d3Smrg pitch = (tex_res->pitch + 7) >> 3; 576de2362d3Smrg } 577de2362d3Smrg 578de2362d3Smrg sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | 579de2362d3Smrg (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); 580de2362d3Smrg 581de2362d3Smrg if (tex_res->w) 582de2362d3Smrg sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) | 583de2362d3Smrg ((tex_res->w - 1) << TEX_WIDTH_shift)); 584de2362d3Smrg 585de2362d3Smrg if (tex_res->tile_type) 586de2362d3Smrg sq_tex_resource_word0 |= TILE_TYPE_bit; 587de2362d3Smrg 588de2362d3Smrg sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); 589de2362d3Smrg 590de2362d3Smrg if (tex_res->h) 591de2362d3Smrg sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); 592de2362d3Smrg if (tex_res->depth) 593de2362d3Smrg sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); 594de2362d3Smrg 595de2362d3Smrg sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | 596de2362d3Smrg (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | 597de2362d3Smrg (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | 598de2362d3Smrg (tex_res->format_comp_w << FORMAT_COMP_W_shift) | 599de2362d3Smrg (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | 600de2362d3Smrg (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | 601de2362d3Smrg (tex_res->request_size << REQUEST_SIZE_shift) | 602de2362d3Smrg (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | 603de2362d3Smrg (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | 604de2362d3Smrg (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | 605de2362d3Smrg (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | 606de2362d3Smrg (tex_res->base_level << BASE_LEVEL_shift)); 607de2362d3Smrg 608de2362d3Smrg if (tex_res->srf_mode_all) 609de2362d3Smrg sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; 610de2362d3Smrg if (tex_res->force_degamma) 611de2362d3Smrg sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; 612de2362d3Smrg 613de2362d3Smrg sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | 614de2362d3Smrg (tex_res->base_array << BASE_ARRAY_shift) | 615de2362d3Smrg (tex_res->last_array << LAST_ARRAY_shift)); 616de2362d3Smrg 617de2362d3Smrg sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | 618de2362d3Smrg (tex_res->perf_modulation << PERF_MODULATION_shift) | 619de2362d3Smrg (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); 620de2362d3Smrg 621de2362d3Smrg if (tex_res->interlaced) 622de2362d3Smrg sq_tex_resource_word6 |= INTERLACED_bit; 623de2362d3Smrg 624de2362d3Smrg /* flush texture cache */ 62518781e08Smrg r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, 626de2362d3Smrg tex_res->size, tex_res->base, 627de2362d3Smrg tex_res->bo, domain, 0); 628de2362d3Smrg 629de2362d3Smrg BEGIN_BATCH(9 + 4); 63018781e08Smrg PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); 63118781e08Smrg E32(sq_tex_resource_word0); 63218781e08Smrg E32(sq_tex_resource_word1); 63318781e08Smrg E32(((tex_res->base) >> 8)); 63418781e08Smrg E32(((tex_res->mip_base) >> 8)); 63518781e08Smrg E32(sq_tex_resource_word4); 63618781e08Smrg E32(sq_tex_resource_word5); 63718781e08Smrg E32(sq_tex_resource_word6); 638de2362d3Smrg RELOC_BATCH(tex_res->bo, domain, 0); 639de2362d3Smrg RELOC_BATCH(tex_res->mip_bo, domain, 0); 640de2362d3Smrg END_BATCH(); 641de2362d3Smrg} 642de2362d3Smrg 643de2362d3Smrgvoid 64418781e08Smrgr600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) 645de2362d3Smrg{ 646de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 647de2362d3Smrg uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; 648de2362d3Smrg 649de2362d3Smrg sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | 650de2362d3Smrg (s->clamp_y << CLAMP_Y_shift) | 651de2362d3Smrg (s->clamp_z << CLAMP_Z_shift) | 652de2362d3Smrg (s->xy_mag_filter << XY_MAG_FILTER_shift) | 653de2362d3Smrg (s->xy_min_filter << XY_MIN_FILTER_shift) | 654de2362d3Smrg (s->z_filter << Z_FILTER_shift) | 655de2362d3Smrg (s->mip_filter << MIP_FILTER_shift) | 656de2362d3Smrg (s->border_color << BORDER_COLOR_TYPE_shift) | 657de2362d3Smrg (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | 658de2362d3Smrg (s->chroma_key << CHROMA_KEY_shift)); 659de2362d3Smrg if (s->point_sampling_clamp) 660de2362d3Smrg sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; 661de2362d3Smrg if (s->tex_array_override) 662de2362d3Smrg sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; 663de2362d3Smrg if (s->lod_uses_minor_axis) 664de2362d3Smrg sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; 665de2362d3Smrg 666de2362d3Smrg sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | 667de2362d3Smrg (s->max_lod << MAX_LOD_shift) | 668de2362d3Smrg (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); 669de2362d3Smrg 670de2362d3Smrg sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | 671de2362d3Smrg (s->perf_mip << PERF_MIP_shift) | 672de2362d3Smrg (s->perf_z << PERF_Z_shift)); 673de2362d3Smrg if (s->mc_coord_truncate) 674de2362d3Smrg sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; 675de2362d3Smrg if (s->force_degamma) 676de2362d3Smrg sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; 677de2362d3Smrg if (s->high_precision_filter) 678de2362d3Smrg sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; 679de2362d3Smrg if (s->fetch_4) 680de2362d3Smrg sq_tex_sampler_word2 |= FETCH_4_bit; 681de2362d3Smrg if (s->sample_is_pcf) 682de2362d3Smrg sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; 683de2362d3Smrg if (s->type) 684de2362d3Smrg sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; 685de2362d3Smrg 686de2362d3Smrg BEGIN_BATCH(5); 68718781e08Smrg PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); 68818781e08Smrg E32(sq_tex_sampler_word0); 68918781e08Smrg E32(sq_tex_sampler_word1); 69018781e08Smrg E32(sq_tex_sampler_word2); 691de2362d3Smrg END_BATCH(); 692de2362d3Smrg} 693de2362d3Smrg 694de2362d3Smrg//XXX deal with clip offsets in clip setup 695de2362d3Smrgvoid 69618781e08Smrgr600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 697de2362d3Smrg{ 698de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 699de2362d3Smrg 700de2362d3Smrg BEGIN_BATCH(4); 70118781e08Smrg PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); 70218781e08Smrg E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | 703de2362d3Smrg (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); 70418781e08Smrg E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | 705de2362d3Smrg (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); 706de2362d3Smrg END_BATCH(); 707de2362d3Smrg} 708de2362d3Smrg 709de2362d3Smrgvoid 71018781e08Smrgr600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 711de2362d3Smrg{ 712de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 713de2362d3Smrg 714de2362d3Smrg BEGIN_BATCH(4); 71518781e08Smrg PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); 71618781e08Smrg E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | 717de2362d3Smrg (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | 718de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 71918781e08Smrg E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | 720de2362d3Smrg (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); 721de2362d3Smrg END_BATCH(); 722de2362d3Smrg} 723de2362d3Smrg 724de2362d3Smrgvoid 72518781e08Smrgr600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 726de2362d3Smrg{ 727de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 728de2362d3Smrg 729de2362d3Smrg BEGIN_BATCH(4); 73018781e08Smrg PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); 73118781e08Smrg E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | 732de2362d3Smrg (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | 733de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 73418781e08Smrg E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | 735de2362d3Smrg (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); 736de2362d3Smrg END_BATCH(); 737de2362d3Smrg} 738de2362d3Smrg 739de2362d3Smrgvoid 74018781e08Smrgr600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 741de2362d3Smrg{ 742de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 743de2362d3Smrg 744de2362d3Smrg BEGIN_BATCH(4); 74518781e08Smrg PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); 74618781e08Smrg E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | 747de2362d3Smrg (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | 748de2362d3Smrg WINDOW_OFFSET_DISABLE_bit)); 74918781e08Smrg E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | 750de2362d3Smrg (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); 751de2362d3Smrg END_BATCH(); 752de2362d3Smrg} 753de2362d3Smrg 754de2362d3Smrgvoid 75518781e08Smrgr600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) 756de2362d3Smrg{ 757de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 758de2362d3Smrg 759de2362d3Smrg BEGIN_BATCH(4); 76018781e08Smrg PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); 76118781e08Smrg E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | 762de2362d3Smrg (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); 76318781e08Smrg E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | 764de2362d3Smrg (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); 765de2362d3Smrg END_BATCH(); 766de2362d3Smrg} 767de2362d3Smrg 768de2362d3Smrg/* 769de2362d3Smrg * Setup of default state 770de2362d3Smrg */ 771de2362d3Smrg 772de2362d3Smrgvoid 77318781e08Smrgr600_set_default_state(ScrnInfoPtr pScrn) 774de2362d3Smrg{ 775de2362d3Smrg tex_resource_t tex_res; 776de2362d3Smrg shader_config_t fs_conf; 777de2362d3Smrg sq_config_t sq_conf; 778de2362d3Smrg int i; 779de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 780de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 781de2362d3Smrg 782de2362d3Smrg if (accel_state->XInited3D) 783de2362d3Smrg return; 784de2362d3Smrg 785de2362d3Smrg memset(&tex_res, 0, sizeof(tex_resource_t)); 786de2362d3Smrg memset(&fs_conf, 0, sizeof(shader_config_t)); 787de2362d3Smrg 788de2362d3Smrg accel_state->XInited3D = TRUE; 789de2362d3Smrg 79018781e08Smrg r600_start_3d(pScrn); 791de2362d3Smrg 792de2362d3Smrg // SQ 793de2362d3Smrg sq_conf.ps_prio = 0; 794de2362d3Smrg sq_conf.vs_prio = 1; 795de2362d3Smrg sq_conf.gs_prio = 2; 796de2362d3Smrg sq_conf.es_prio = 3; 797de2362d3Smrg // need to set stack/thread/gpr limits based on the asic 798de2362d3Smrg // for now just set them low enough so any card will work 799de2362d3Smrg // see r600_cp.c in the drm 800de2362d3Smrg switch (info->ChipFamily) { 801de2362d3Smrg case CHIP_FAMILY_R600: 802de2362d3Smrg sq_conf.num_ps_gprs = 192; 803de2362d3Smrg sq_conf.num_vs_gprs = 56; 804de2362d3Smrg sq_conf.num_temp_gprs = 4; 805de2362d3Smrg sq_conf.num_gs_gprs = 0; 806de2362d3Smrg sq_conf.num_es_gprs = 0; 807de2362d3Smrg sq_conf.num_ps_threads = 136; 808de2362d3Smrg sq_conf.num_vs_threads = 48; 809de2362d3Smrg sq_conf.num_gs_threads = 4; 810de2362d3Smrg sq_conf.num_es_threads = 4; 811de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 812de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 813de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 814de2362d3Smrg sq_conf.num_es_stack_entries = 0; 815de2362d3Smrg break; 816de2362d3Smrg case CHIP_FAMILY_RV630: 817de2362d3Smrg case CHIP_FAMILY_RV635: 818de2362d3Smrg sq_conf.num_ps_gprs = 84; 819de2362d3Smrg sq_conf.num_vs_gprs = 36; 820de2362d3Smrg sq_conf.num_temp_gprs = 4; 821de2362d3Smrg sq_conf.num_gs_gprs = 0; 822de2362d3Smrg sq_conf.num_es_gprs = 0; 823de2362d3Smrg sq_conf.num_ps_threads = 144; 824de2362d3Smrg sq_conf.num_vs_threads = 40; 825de2362d3Smrg sq_conf.num_gs_threads = 4; 826de2362d3Smrg sq_conf.num_es_threads = 4; 827de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 828de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 829de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 830de2362d3Smrg sq_conf.num_es_stack_entries = 16; 831de2362d3Smrg break; 832de2362d3Smrg case CHIP_FAMILY_RV610: 833de2362d3Smrg case CHIP_FAMILY_RV620: 834de2362d3Smrg case CHIP_FAMILY_RS780: 835de2362d3Smrg case CHIP_FAMILY_RS880: 836de2362d3Smrg default: 837de2362d3Smrg sq_conf.num_ps_gprs = 84; 838de2362d3Smrg sq_conf.num_vs_gprs = 36; 839de2362d3Smrg sq_conf.num_temp_gprs = 4; 840de2362d3Smrg sq_conf.num_gs_gprs = 0; 841de2362d3Smrg sq_conf.num_es_gprs = 0; 842de2362d3Smrg sq_conf.num_ps_threads = 136; 843de2362d3Smrg sq_conf.num_vs_threads = 48; 844de2362d3Smrg sq_conf.num_gs_threads = 4; 845de2362d3Smrg sq_conf.num_es_threads = 4; 846de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 847de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 848de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 849de2362d3Smrg sq_conf.num_es_stack_entries = 16; 850de2362d3Smrg break; 851de2362d3Smrg case CHIP_FAMILY_RV670: 852de2362d3Smrg sq_conf.num_ps_gprs = 144; 853de2362d3Smrg sq_conf.num_vs_gprs = 40; 854de2362d3Smrg sq_conf.num_temp_gprs = 4; 855de2362d3Smrg sq_conf.num_gs_gprs = 0; 856de2362d3Smrg sq_conf.num_es_gprs = 0; 857de2362d3Smrg sq_conf.num_ps_threads = 136; 858de2362d3Smrg sq_conf.num_vs_threads = 48; 859de2362d3Smrg sq_conf.num_gs_threads = 4; 860de2362d3Smrg sq_conf.num_es_threads = 4; 861de2362d3Smrg sq_conf.num_ps_stack_entries = 40; 862de2362d3Smrg sq_conf.num_vs_stack_entries = 40; 863de2362d3Smrg sq_conf.num_gs_stack_entries = 32; 864de2362d3Smrg sq_conf.num_es_stack_entries = 16; 865de2362d3Smrg break; 866de2362d3Smrg case CHIP_FAMILY_RV770: 867de2362d3Smrg sq_conf.num_ps_gprs = 192; 868de2362d3Smrg sq_conf.num_vs_gprs = 56; 869de2362d3Smrg sq_conf.num_temp_gprs = 4; 870de2362d3Smrg sq_conf.num_gs_gprs = 0; 871de2362d3Smrg sq_conf.num_es_gprs = 0; 872de2362d3Smrg sq_conf.num_ps_threads = 188; 873de2362d3Smrg sq_conf.num_vs_threads = 60; 874de2362d3Smrg sq_conf.num_gs_threads = 0; 875de2362d3Smrg sq_conf.num_es_threads = 0; 876de2362d3Smrg sq_conf.num_ps_stack_entries = 256; 877de2362d3Smrg sq_conf.num_vs_stack_entries = 256; 878de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 879de2362d3Smrg sq_conf.num_es_stack_entries = 0; 880de2362d3Smrg break; 881de2362d3Smrg case CHIP_FAMILY_RV730: 882de2362d3Smrg case CHIP_FAMILY_RV740: 883de2362d3Smrg sq_conf.num_ps_gprs = 84; 884de2362d3Smrg sq_conf.num_vs_gprs = 36; 885de2362d3Smrg sq_conf.num_temp_gprs = 4; 886de2362d3Smrg sq_conf.num_gs_gprs = 0; 887de2362d3Smrg sq_conf.num_es_gprs = 0; 888de2362d3Smrg sq_conf.num_ps_threads = 188; 889de2362d3Smrg sq_conf.num_vs_threads = 60; 890de2362d3Smrg sq_conf.num_gs_threads = 0; 891de2362d3Smrg sq_conf.num_es_threads = 0; 892de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 893de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 894de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 895de2362d3Smrg sq_conf.num_es_stack_entries = 0; 896de2362d3Smrg break; 897de2362d3Smrg case CHIP_FAMILY_RV710: 898de2362d3Smrg sq_conf.num_ps_gprs = 192; 899de2362d3Smrg sq_conf.num_vs_gprs = 56; 900de2362d3Smrg sq_conf.num_temp_gprs = 4; 901de2362d3Smrg sq_conf.num_gs_gprs = 0; 902de2362d3Smrg sq_conf.num_es_gprs = 0; 903de2362d3Smrg sq_conf.num_ps_threads = 144; 904de2362d3Smrg sq_conf.num_vs_threads = 48; 905de2362d3Smrg sq_conf.num_gs_threads = 0; 906de2362d3Smrg sq_conf.num_es_threads = 0; 907de2362d3Smrg sq_conf.num_ps_stack_entries = 128; 908de2362d3Smrg sq_conf.num_vs_stack_entries = 128; 909de2362d3Smrg sq_conf.num_gs_stack_entries = 0; 910de2362d3Smrg sq_conf.num_es_stack_entries = 0; 911de2362d3Smrg break; 912de2362d3Smrg } 913de2362d3Smrg 91418781e08Smrg r600_sq_setup(pScrn, &sq_conf); 915de2362d3Smrg 916de2362d3Smrg /* set fake reloc for unused depth */ 917de2362d3Smrg BEGIN_BATCH(3 + 2); 91818781e08Smrg EREG(DB_DEPTH_INFO, 0); 919de2362d3Smrg RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 920de2362d3Smrg END_BATCH(); 921de2362d3Smrg 922de2362d3Smrg BEGIN_BATCH(80); 923de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) { 92418781e08Smrg EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | 925de2362d3Smrg (28 << TD_FIFO_CREDIT_shift))); 92618781e08Smrg EREG(VC_ENHANCE, 0); 92718781e08Smrg EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); 92818781e08Smrg EREG(DB_DEBUG, 0x82000000); /* ? */ 92918781e08Smrg EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 930de2362d3Smrg (16 << DEPTH_FLUSH_shift) | 931de2362d3Smrg (0 << FORCE_SUMMARIZE_shift) | 932de2362d3Smrg (4 << DEPTH_PENDING_FREE_shift) | 933de2362d3Smrg (16 << DEPTH_CACHELINE_FREE_shift) | 934de2362d3Smrg 0)); 935de2362d3Smrg } else { 93618781e08Smrg EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | 937de2362d3Smrg (28 << TD_FIFO_CREDIT_shift))); 93818781e08Smrg EREG(VC_ENHANCE, 0); 93918781e08Smrg EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); 94018781e08Smrg EREG(DB_DEBUG, 0); 94118781e08Smrg EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | 942de2362d3Smrg (16 << DEPTH_FLUSH_shift) | 943de2362d3Smrg (0 << FORCE_SUMMARIZE_shift) | 944de2362d3Smrg (4 << DEPTH_PENDING_FREE_shift) | 945de2362d3Smrg (4 << DEPTH_CACHELINE_FREE_shift) | 946de2362d3Smrg 0)); 947de2362d3Smrg } 948de2362d3Smrg 94918781e08Smrg PACK0(SQ_VTX_BASE_VTX_LOC, 2); 95018781e08Smrg E32(0); 95118781e08Smrg E32(0); 95218781e08Smrg 95318781e08Smrg PACK0(SQ_ESGS_RING_ITEMSIZE, 9); 95418781e08Smrg E32(0); // SQ_ESGS_RING_ITEMSIZE 95518781e08Smrg E32(0); // SQ_GSVS_RING_ITEMSIZE 95618781e08Smrg E32(0); // SQ_ESTMP_RING_ITEMSIZE 95718781e08Smrg E32(0); // SQ_GSTMP_RING_ITEMSIZE 95818781e08Smrg E32(0); // SQ_VSTMP_RING_ITEMSIZE 95918781e08Smrg E32(0); // SQ_PSTMP_RING_ITEMSIZE 96018781e08Smrg E32(0); // SQ_FBUF_RING_ITEMSIZE 96118781e08Smrg E32(0); // SQ_REDUC_RING_ITEMSIZE 96218781e08Smrg E32(0); // SQ_GS_VERT_ITEMSIZE 963de2362d3Smrg 964de2362d3Smrg // DB 96518781e08Smrg EREG(DB_DEPTH_CONTROL, 0); 96618781e08Smrg PACK0(DB_RENDER_CONTROL, 2); 96718781e08Smrg E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); 968de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 96918781e08Smrg E32(FORCE_SHADER_Z_ORDER_bit); 970de2362d3Smrg else 97118781e08Smrg E32(0); 97218781e08Smrg EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | 973de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET1_shift) | 974de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET2_shift) | 975de2362d3Smrg (2 << ALPHA_TO_MASK_OFFSET3_shift))); 97618781e08Smrg EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 977de2362d3Smrg DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 978de2362d3Smrg 97918781e08Smrg PACK0(DB_STENCIL_CLEAR, 2); 98018781e08Smrg E32(0); // DB_STENCIL_CLEAR 98118781e08Smrg E32(0); // DB_DEPTH_CLEAR 982de2362d3Smrg 98318781e08Smrg PACK0(DB_STENCILREFMASK, 3); 98418781e08Smrg E32(0); // DB_STENCILREFMASK 98518781e08Smrg E32(0); // DB_STENCILREFMASK_BF 98618781e08Smrg E32(0); // SX_ALPHA_REF 987de2362d3Smrg 98818781e08Smrg PACK0(CB_CLRCMP_CONTROL, 4); 98918781e08Smrg E32(1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC 99018781e08Smrg E32(0); // CB_CLRCMP_SRC 99118781e08Smrg E32(0); // CB_CLRCMP_DST 99218781e08Smrg E32(0); // CB_CLRCMP_MSK 993de2362d3Smrg 99418781e08Smrg EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); 99518781e08Smrg EREG(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 996de2362d3Smrg 99718781e08Smrg PACK0(SX_ALPHA_TEST_CONTROL, 5); 99818781e08Smrg E32(0); // SX_ALPHA_TEST_CONTROL 99918781e08Smrg E32(0x00000000); // CB_BLEND_RED 100018781e08Smrg E32(0x00000000); // CB_BLEND_GREEN 100118781e08Smrg E32(0x00000000); // CB_BLEND_BLUE 100218781e08Smrg E32(0x00000000); // CB_BLEND_ALPHA 1003de2362d3Smrg 100418781e08Smrg EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | 1005de2362d3Smrg (0 << WINDOW_Y_OFFSET_shift))); 1006de2362d3Smrg 1007de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 100818781e08Smrg EREG(R7xx_PA_SC_EDGERULE, 0x00000000); 1009de2362d3Smrg else 101018781e08Smrg EREG(R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); 1011de2362d3Smrg 101218781e08Smrg EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); 1013de2362d3Smrg 1014de2362d3Smrg END_BATCH(); 1015de2362d3Smrg 1016de2362d3Smrg /* clip boolean is set to always visible -> doesn't matter */ 1017de2362d3Smrg for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) 101818781e08Smrg r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192); 1019de2362d3Smrg 1020de2362d3Smrg for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) 102118781e08Smrg r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192); 1022de2362d3Smrg 1023de2362d3Smrg BEGIN_BATCH(49); 102418781e08Smrg PACK0(PA_SC_MPASS_PS_CNTL, 2); 102518781e08Smrg E32(0); 1026de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 102718781e08Smrg E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); 1028de2362d3Smrg else 102918781e08Smrg E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 1030de2362d3Smrg 0x00500000)); /* ? */ 1031de2362d3Smrg 103218781e08Smrg PACK0(PA_SC_LINE_CNTL, 9); 103318781e08Smrg E32(0); // PA_SC_LINE_CNTL 103418781e08Smrg E32(0); // PA_SC_AA_CONFIG 103518781e08Smrg E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL 1036de2362d3Smrg (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ 103718781e08Smrg EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ 103818781e08Smrg EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ 103918781e08Smrg EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ 104018781e08Smrg EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ 104118781e08Smrg E32(0); // PA_SC_AA_SAMPLE_LOCS_MCTX 104218781e08Smrg E32(0); // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M 104318781e08Smrg 104418781e08Smrg EREG(PA_SC_AA_MASK, 0xFFFFFFFF); 104518781e08Smrg 104618781e08Smrg PACK0(PA_CL_CLIP_CNTL, 5); 104718781e08Smrg E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL 104818781e08Smrg E32(FACE_bit); // PA_SU_SC_MODE_CNTL 104918781e08Smrg E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL 105018781e08Smrg E32(0); // PA_CL_VS_OUT_CNTL 105118781e08Smrg E32(0); // PA_CL_NANINF_CNTL 105218781e08Smrg 105318781e08Smrg PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); 105418781e08Smrg E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL 105518781e08Smrg E32(0); // PA_SU_POLY_OFFSET_CLAMP 105618781e08Smrg E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE 105718781e08Smrg E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET 105818781e08Smrg E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE 105918781e08Smrg E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET 1060de2362d3Smrg 1061de2362d3Smrg // SPI 1062de2362d3Smrg if (info->ChipFamily < CHIP_FAMILY_RV770) 106318781e08Smrg EREG(R7xx_SPI_THREAD_GROUPING, 0); 1064de2362d3Smrg else 106518781e08Smrg EREG(R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); 1066de2362d3Smrg 1067de2362d3Smrg /* default Interpolator setup */ 106818781e08Smrg EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1069de2362d3Smrg (1 << SEMANTIC_1_shift))); 107018781e08Smrg PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); 1071de2362d3Smrg /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 107218781e08Smrg E32(((0 << SEMANTIC_shift) | 1073de2362d3Smrg (0x01 << DEFAULT_VAL_shift) | 1074de2362d3Smrg SEL_CENTROID_bit)); 1075de2362d3Smrg /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 107618781e08Smrg E32(((1 << SEMANTIC_shift) | 1077de2362d3Smrg (0x01 << DEFAULT_VAL_shift) | 1078de2362d3Smrg SEL_CENTROID_bit)); 1079de2362d3Smrg 108018781e08Smrg PACK0(SPI_INPUT_Z, 4); 108118781e08Smrg E32(0); // SPI_INPUT_Z 108218781e08Smrg E32(0); // SPI_FOG_CNTL 108318781e08Smrg E32(0); // SPI_FOG_FUNC_SCALE 108418781e08Smrg E32(0); // SPI_FOG_FUNC_BIAS 1085de2362d3Smrg 1086de2362d3Smrg END_BATCH(); 1087de2362d3Smrg 1088de2362d3Smrg // clear FS 1089de2362d3Smrg fs_conf.bo = accel_state->shaders_bo; 109018781e08Smrg r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); 1091de2362d3Smrg 1092de2362d3Smrg // VGT 1093de2362d3Smrg BEGIN_BATCH(46); 109418781e08Smrg PACK0(VGT_MAX_VTX_INDX, 4); 109518781e08Smrg E32(0xffffff); // VGT_MAX_VTX_INDX 109618781e08Smrg E32(0); // VGT_MIN_VTX_INDX 109718781e08Smrg E32(0); // VGT_INDX_OFFSET 109818781e08Smrg E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX 109918781e08Smrg 110018781e08Smrg EREG(VGT_PRIMITIVEID_EN, 0); 110118781e08Smrg EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); 110218781e08Smrg 110318781e08Smrg PACK0(VGT_INSTANCE_STEP_RATE_0, 2); 110418781e08Smrg E32(0); // VGT_INSTANCE_STEP_RATE_0 110518781e08Smrg E32(0); // VGT_INSTANCE_STEP_RATE_1 110618781e08Smrg 110718781e08Smrg PACK0(PA_SU_POINT_SIZE, 17); 110818781e08Smrg E32(0); // PA_SU_POINT_SIZE 110918781e08Smrg E32(0); // PA_SU_POINT_MINMAX 111018781e08Smrg E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL 111118781e08Smrg E32(0); // PA_SC_LINE_STIPPLE 111218781e08Smrg E32(0); // VGT_OUTPUT_PATH_CNTL 111318781e08Smrg E32(0); // VGT_HOS_CNTL 111418781e08Smrg E32(0); // VGT_HOS_MAX_TESS_LEVEL 111518781e08Smrg E32(0); // VGT_HOS_MIN_TESS_LEVEL 111618781e08Smrg E32(0); // VGT_HOS_REUSE_DEPTH 111718781e08Smrg E32(0); // VGT_GROUP_PRIM_TYPE 111818781e08Smrg E32(0); // VGT_GROUP_FIRST_DECR 111918781e08Smrg E32(0); // VGT_GROUP_DECR 112018781e08Smrg E32(0); // VGT_GROUP_VECT_0_CNTL 112118781e08Smrg E32(0); // VGT_GROUP_VECT_1_CNTL 112218781e08Smrg E32(0); // VGT_GROUP_VECT_0_FMT_CNTL 112318781e08Smrg E32(0); // VGT_GROUP_VECT_1_FMT_CNTL 112418781e08Smrg E32(0); // VGT_GS_MODE 112518781e08Smrg 112618781e08Smrg PACK0(VGT_STRMOUT_EN, 3); 112718781e08Smrg E32(0); // VGT_STRMOUT_EN 112818781e08Smrg E32(0); // VGT_REUSE_OFF 112918781e08Smrg E32(0); // VGT_VTX_CNT_EN 113018781e08Smrg 113118781e08Smrg EREG(VGT_STRMOUT_BUFFER_EN, 0); 113218781e08Smrg EREG(SX_MISC, 0); 1133de2362d3Smrg END_BATCH(); 1134de2362d3Smrg} 1135de2362d3Smrg 1136de2362d3Smrg 1137de2362d3Smrg/* 1138de2362d3Smrg * Commands 1139de2362d3Smrg */ 1140de2362d3Smrg 1141de2362d3Smrgvoid 114218781e08Smrgr600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices) 1143de2362d3Smrg{ 1144de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1145de2362d3Smrg uint32_t i, count; 1146de2362d3Smrg 1147de2362d3Smrg // calculate num of packets 1148de2362d3Smrg count = 2; 1149de2362d3Smrg if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) 1150de2362d3Smrg count += (draw_conf->num_indices + 1) / 2; 1151de2362d3Smrg else 1152de2362d3Smrg count += draw_conf->num_indices; 1153de2362d3Smrg 1154de2362d3Smrg BEGIN_BATCH(8 + count); 115518781e08Smrg EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 115618781e08Smrg PACK3(IT_INDEX_TYPE, 1); 1157de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 115818781e08Smrg E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1159de2362d3Smrg#else 116018781e08Smrg E32(draw_conf->index_type); 1161de2362d3Smrg#endif 116218781e08Smrg PACK3(IT_NUM_INSTANCES, 1); 116318781e08Smrg E32(draw_conf->num_instances); 1164de2362d3Smrg 116518781e08Smrg PACK3(IT_DRAW_INDEX_IMMD, count); 116618781e08Smrg E32(draw_conf->num_indices); 116718781e08Smrg E32(draw_conf->vgt_draw_initiator); 1168de2362d3Smrg 1169de2362d3Smrg if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { 1170de2362d3Smrg for (i = 0; i < draw_conf->num_indices; i += 2) { 1171de2362d3Smrg if ((i + 1) == draw_conf->num_indices) 117218781e08Smrg E32(indices[i]); 1173de2362d3Smrg else 117418781e08Smrg E32((indices[i] | (indices[i + 1] << 16))); 1175de2362d3Smrg } 1176de2362d3Smrg } else { 1177de2362d3Smrg for (i = 0; i < draw_conf->num_indices; i++) 117818781e08Smrg E32(indices[i]); 1179de2362d3Smrg } 1180de2362d3Smrg END_BATCH(); 1181de2362d3Smrg} 1182de2362d3Smrg 1183de2362d3Smrgvoid 118418781e08Smrgr600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) 1185de2362d3Smrg{ 1186de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1187de2362d3Smrg 1188de2362d3Smrg BEGIN_BATCH(10); 118918781e08Smrg EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); 119018781e08Smrg PACK3(IT_INDEX_TYPE, 1); 1191de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 119218781e08Smrg E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); 1193de2362d3Smrg#else 119418781e08Smrg E32(draw_conf->index_type); 1195de2362d3Smrg#endif 119618781e08Smrg PACK3(IT_NUM_INSTANCES, 1); 119718781e08Smrg E32(draw_conf->num_instances); 119818781e08Smrg PACK3(IT_DRAW_INDEX_AUTO, 2); 119918781e08Smrg E32(draw_conf->num_indices); 120018781e08Smrg E32(draw_conf->vgt_draw_initiator); 1201de2362d3Smrg END_BATCH(); 1202de2362d3Smrg} 1203de2362d3Smrg 1204de2362d3Smrgvoid r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) 1205de2362d3Smrg{ 1206de2362d3Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1207de2362d3Smrg struct radeon_accel_state *accel_state = info->accel_state; 1208de2362d3Smrg draw_config_t draw_conf; 1209de2362d3Smrg vtx_resource_t vtx_res; 1210de2362d3Smrg 1211de2362d3Smrg if (accel_state->vbo.vb_start_op == -1) 1212de2362d3Smrg return; 1213de2362d3Smrg 1214de2362d3Smrg CLEAR (draw_conf); 1215de2362d3Smrg CLEAR (vtx_res); 1216de2362d3Smrg 1217de2362d3Smrg if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { 121818781e08Smrg R600IBDiscard(pScrn); 1219de2362d3Smrg return; 1220de2362d3Smrg } 1221de2362d3Smrg 1222de2362d3Smrg /* Vertex buffer setup */ 1223de2362d3Smrg accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; 1224de2362d3Smrg vtx_res.id = SQ_VTX_RESOURCE_vs; 1225de2362d3Smrg vtx_res.vtx_size_dw = vtx_size / 4; 1226de2362d3Smrg vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; 1227de2362d3Smrg vtx_res.mem_req_size = 1; 122818781e08Smrg vtx_res.vb_addr = accel_state->vbo.vb_start_op; 1229de2362d3Smrg vtx_res.bo = accel_state->vbo.vb_bo; 1230de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 1231de2362d3Smrg vtx_res.endian = SQ_ENDIAN_8IN32; 1232de2362d3Smrg#endif 123318781e08Smrg r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); 1234de2362d3Smrg 1235de2362d3Smrg /* Draw */ 1236de2362d3Smrg draw_conf.prim_type = DI_PT_RECTLIST; 1237de2362d3Smrg draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1238de2362d3Smrg draw_conf.num_instances = 1; 1239de2362d3Smrg draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1240de2362d3Smrg draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1241de2362d3Smrg 124218781e08Smrg r600_draw_auto(pScrn, &draw_conf); 1243de2362d3Smrg 1244de2362d3Smrg /* XXX drm should handle this in fence submit */ 124518781e08Smrg r600_wait_3d_idle_clean(pScrn); 1246de2362d3Smrg 1247de2362d3Smrg /* sync dst surface */ 124818781e08Smrg r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 124918781e08Smrg accel_state->dst_size, 0, 1250de2362d3Smrg accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); 1251de2362d3Smrg 1252de2362d3Smrg accel_state->vbo.vb_start_op = -1; 1253de2362d3Smrg accel_state->ib_reset_op = 0; 1254de2362d3Smrg 1255de2362d3Smrg} 1256de2362d3Smrg 1257