r6xx_accel.c revision de2362d3
1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Authors: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *          Matthias Hopf <mhopf@suse.de>
25de2362d3Smrg */
26de2362d3Smrg#ifdef HAVE_CONFIG_H
27de2362d3Smrg#include "config.h"
28de2362d3Smrg#endif
29de2362d3Smrg
30de2362d3Smrg#include "xf86.h"
31de2362d3Smrg
32de2362d3Smrg#include <errno.h>
33de2362d3Smrg
34de2362d3Smrg#include "radeon.h"
35de2362d3Smrg#include "r600_shader.h"
36de2362d3Smrg#include "radeon_reg.h"
37de2362d3Smrg#include "r600_reg.h"
38de2362d3Smrg#include "r600_state.h"
39de2362d3Smrg
40de2362d3Smrg#include "radeon_vbo.h"
41de2362d3Smrg#include "radeon_exa_shared.h"
42de2362d3Smrg
43de2362d3Smrgstatic const uint32_t R600_ROP[16] = {
44de2362d3Smrg    RADEON_ROP3_ZERO, /* GXclear        */
45de2362d3Smrg    RADEON_ROP3_DSa,  /* Gxand          */
46de2362d3Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
47de2362d3Smrg    RADEON_ROP3_S,    /* GXcopy         */
48de2362d3Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
49de2362d3Smrg    RADEON_ROP3_D,    /* GXnoop         */
50de2362d3Smrg    RADEON_ROP3_DSx,  /* GXxor          */
51de2362d3Smrg    RADEON_ROP3_DSo,  /* GXor           */
52de2362d3Smrg    RADEON_ROP3_DSon, /* GXnor          */
53de2362d3Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
54de2362d3Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
55de2362d3Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
56de2362d3Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
57de2362d3Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
58de2362d3Smrg    RADEON_ROP3_DSan, /* GXnand         */
59de2362d3Smrg    RADEON_ROP3_ONE,  /* GXset          */
60de2362d3Smrg};
61de2362d3Smrg
62de2362d3Smrg/* we try and batch operations together under KMS -
63de2362d3Smrg   but it doesn't work yet without misrendering */
64de2362d3Smrg#define KMS_MULTI_OP 1
65de2362d3Smrg
66de2362d3Smrg/* Flush the indirect buffer to the kernel for submission to the card */
67de2362d3Smrgvoid R600CPFlushIndirect(ScrnInfoPtr pScrn)
68de2362d3Smrg{
69de2362d3Smrg    radeon_cs_flush_indirect(pScrn);
70de2362d3Smrg}
71de2362d3Smrg
72de2362d3Smrgvoid R600IBDiscard(ScrnInfoPtr pScrn)
73de2362d3Smrg{
74de2362d3Smrg    radeon_ib_discard(pScrn);
75de2362d3Smrg}
76de2362d3Smrg
77de2362d3Smrgvoid
78de2362d3Smrgr600_wait_3d_idle_clean(ScrnInfoPtr pScrn)
79de2362d3Smrg{
80de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
81de2362d3Smrg
82de2362d3Smrg    //flush caches, don't generate timestamp
83de2362d3Smrg    BEGIN_BATCH(5);
84de2362d3Smrg    PACK3(IT_EVENT_WRITE, 1);
85de2362d3Smrg    E32(CACHE_FLUSH_AND_INV_EVENT);
86de2362d3Smrg    // wait for 3D idle clean
87de2362d3Smrg    EREG(WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
88de2362d3Smrg						   WAIT_3D_IDLECLEAN_bit));
89de2362d3Smrg    END_BATCH();
90de2362d3Smrg}
91de2362d3Smrg
92de2362d3Smrgvoid
93de2362d3Smrgr600_wait_3d_idle(ScrnInfoPtr pScrn)
94de2362d3Smrg{
95de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
96de2362d3Smrg
97de2362d3Smrg    BEGIN_BATCH(3);
98de2362d3Smrg    EREG(WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
99de2362d3Smrg    END_BATCH();
100de2362d3Smrg}
101de2362d3Smrg
102de2362d3Smrgvoid
103de2362d3Smrgr600_start_3d(ScrnInfoPtr pScrn)
104de2362d3Smrg{
105de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
106de2362d3Smrg
107de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770) {
108de2362d3Smrg	BEGIN_BATCH(5);
109de2362d3Smrg	PACK3(IT_START_3D_CMDBUF, 1);
110de2362d3Smrg	E32(0);
111de2362d3Smrg    } else
112de2362d3Smrg	BEGIN_BATCH(3);
113de2362d3Smrg
114de2362d3Smrg    PACK3(IT_CONTEXT_CONTROL, 2);
115de2362d3Smrg    E32(0x80000000);
116de2362d3Smrg    E32(0x80000000);
117de2362d3Smrg    END_BATCH();
118de2362d3Smrg
119de2362d3Smrg}
120de2362d3Smrg
121de2362d3Smrg/*
122de2362d3Smrg * Setup of functional groups
123de2362d3Smrg */
124de2362d3Smrg
125de2362d3Smrg// asic stack/thread/gpr limits - need to query the drm
126de2362d3Smrgstatic void
127de2362d3Smrgr600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
128de2362d3Smrg{
129de2362d3Smrg    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
130de2362d3Smrg    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
131de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
132de2362d3Smrg
133de2362d3Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
134de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
135de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
136de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS880) ||
137de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
138de2362d3Smrg	sq_config = 0;						// no VC
139de2362d3Smrg    else
140de2362d3Smrg	sq_config = VC_ENABLE_bit;
141de2362d3Smrg
142de2362d3Smrg    sq_config |= (DX9_CONSTS_bit |
143de2362d3Smrg		  ALU_INST_PREFER_VECTOR_bit |
144de2362d3Smrg		  (sq_conf->ps_prio << PS_PRIO_shift) |
145de2362d3Smrg		  (sq_conf->vs_prio << VS_PRIO_shift) |
146de2362d3Smrg		  (sq_conf->gs_prio << GS_PRIO_shift) |
147de2362d3Smrg		  (sq_conf->es_prio << ES_PRIO_shift));
148de2362d3Smrg
149de2362d3Smrg    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
150de2362d3Smrg			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
151de2362d3Smrg			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
152de2362d3Smrg    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
153de2362d3Smrg			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
154de2362d3Smrg
155de2362d3Smrg    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
156de2362d3Smrg			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
157de2362d3Smrg			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
158de2362d3Smrg			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
159de2362d3Smrg
160de2362d3Smrg    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
161de2362d3Smrg				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
162de2362d3Smrg
163de2362d3Smrg    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
164de2362d3Smrg				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
165de2362d3Smrg
166de2362d3Smrg    BEGIN_BATCH(8);
167de2362d3Smrg    PACK0(SQ_CONFIG, 6);
168de2362d3Smrg    E32(sq_config);
169de2362d3Smrg    E32(sq_gpr_resource_mgmt_1);
170de2362d3Smrg    E32(sq_gpr_resource_mgmt_2);
171de2362d3Smrg    E32(sq_thread_resource_mgmt);
172de2362d3Smrg    E32(sq_stack_resource_mgmt_1);
173de2362d3Smrg    E32(sq_stack_resource_mgmt_2);
174de2362d3Smrg    END_BATCH();
175de2362d3Smrg}
176de2362d3Smrg
177de2362d3Smrgvoid
178de2362d3Smrgr600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
179de2362d3Smrg{
180de2362d3Smrg    uint32_t cb_color_info, cb_color_control;
181de2362d3Smrg    unsigned pitch, slice, h, array_mode;
182de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
183de2362d3Smrg
184de2362d3Smrg
185de2362d3Smrg    if (cb_conf->surface) {
186de2362d3Smrg	switch (cb_conf->surface->level[0].mode) {
187de2362d3Smrg	case RADEON_SURF_MODE_1D:
188de2362d3Smrg		array_mode = 2;
189de2362d3Smrg		break;
190de2362d3Smrg	case RADEON_SURF_MODE_2D:
191de2362d3Smrg		array_mode = 4;
192de2362d3Smrg		break;
193de2362d3Smrg	default:
194de2362d3Smrg		array_mode = 0;
195de2362d3Smrg		break;
196de2362d3Smrg	}
197de2362d3Smrg	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
198de2362d3Smrg	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
199de2362d3Smrg    } else
200de2362d3Smrg    {
201de2362d3Smrg	array_mode = cb_conf->array_mode;
202de2362d3Smrg	pitch = (cb_conf->w / 8) - 1;
203de2362d3Smrg	h = RADEON_ALIGN(cb_conf->h, 8);
204de2362d3Smrg	slice = ((cb_conf->w * h) / 64) - 1;
205de2362d3Smrg    }
206de2362d3Smrg
207de2362d3Smrg    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
208de2362d3Smrg		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
209de2362d3Smrg		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
210de2362d3Smrg		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
211de2362d3Smrg		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
212de2362d3Smrg		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
213de2362d3Smrg    if (cb_conf->read_size)
214de2362d3Smrg	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
215de2362d3Smrg    if (cb_conf->blend_clamp)
216de2362d3Smrg	cb_color_info |= BLEND_CLAMP_bit;
217de2362d3Smrg    if (cb_conf->clear_color)
218de2362d3Smrg	cb_color_info |= CLEAR_COLOR_bit;
219de2362d3Smrg    if (cb_conf->blend_bypass)
220de2362d3Smrg	cb_color_info |= BLEND_BYPASS_bit;
221de2362d3Smrg    if (cb_conf->blend_float32)
222de2362d3Smrg	cb_color_info |= BLEND_FLOAT32_bit;
223de2362d3Smrg    if (cb_conf->simple_float)
224de2362d3Smrg	cb_color_info |= SIMPLE_FLOAT_bit;
225de2362d3Smrg    if (cb_conf->round_mode)
226de2362d3Smrg	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
227de2362d3Smrg    if (cb_conf->tile_compact)
228de2362d3Smrg	cb_color_info |= TILE_COMPACT_bit;
229de2362d3Smrg    if (cb_conf->source_format)
230de2362d3Smrg	cb_color_info |= SOURCE_FORMAT_bit;
231de2362d3Smrg
232de2362d3Smrg    BEGIN_BATCH(3 + 2);
233de2362d3Smrg    EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
234de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
235de2362d3Smrg    END_BATCH();
236de2362d3Smrg
237de2362d3Smrg    // rv6xx workaround
238de2362d3Smrg    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
239de2362d3Smrg        (info->ChipFamily < CHIP_FAMILY_RV770)) {
240de2362d3Smrg        BEGIN_BATCH(2);
241de2362d3Smrg        PACK3(IT_SURFACE_BASE_UPDATE, 1);
242de2362d3Smrg        E32((2 << cb_conf->id));
243de2362d3Smrg        END_BATCH();
244de2362d3Smrg    }
245de2362d3Smrg    /* Set CMASK & TILE buffer to the offset of color buffer as
246de2362d3Smrg     * we don't use those this shouldn't cause any issue and we
247de2362d3Smrg     * then have a valid cmd stream
248de2362d3Smrg     */
249de2362d3Smrg    BEGIN_BATCH(3 + 2);
250de2362d3Smrg    EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
251de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
252de2362d3Smrg    END_BATCH();
253de2362d3Smrg    BEGIN_BATCH(3 + 2);
254de2362d3Smrg    EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
255de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
256de2362d3Smrg    END_BATCH();
257de2362d3Smrg    BEGIN_BATCH(9);
258de2362d3Smrg    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
259de2362d3Smrg    EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
260de2362d3Smrg						    (slice << SLICE_TILE_MAX_shift)));
261de2362d3Smrg    EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
262de2362d3Smrg						    (0    << SLICE_MAX_shift)));
263de2362d3Smrg    EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
264de2362d3Smrg						    (0    << FMASK_TILE_MAX_shift)));
265de2362d3Smrg    END_BATCH();
266de2362d3Smrg
267de2362d3Smrg    BEGIN_BATCH(3 + 2);
268de2362d3Smrg    EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
269de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
270de2362d3Smrg    END_BATCH();
271de2362d3Smrg
272de2362d3Smrg    BEGIN_BATCH(9);
273de2362d3Smrg    EREG(CB_TARGET_MASK,          (cb_conf->pmask << TARGET0_ENABLE_shift));
274de2362d3Smrg    cb_color_control = R600_ROP[cb_conf->rop] |
275de2362d3Smrg	(cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift);
276de2362d3Smrg    if (info->ChipFamily == CHIP_FAMILY_R600) {
277de2362d3Smrg	/* no per-MRT blend on R600 */
278de2362d3Smrg	EREG(CB_COLOR_CONTROL,    cb_color_control);
279de2362d3Smrg	EREG(CB_BLEND_CONTROL,    cb_conf->blendcntl);
280de2362d3Smrg    } else {
281de2362d3Smrg	if (cb_conf->blend_enable)
282de2362d3Smrg	    cb_color_control |= PER_MRT_BLEND_bit;
283de2362d3Smrg	EREG(CB_COLOR_CONTROL,    cb_color_control);
284de2362d3Smrg	EREG(CB_BLEND0_CONTROL,   cb_conf->blendcntl);
285de2362d3Smrg    }
286de2362d3Smrg    END_BATCH();
287de2362d3Smrg}
288de2362d3Smrg
289de2362d3Smrgstatic void
290de2362d3Smrgr600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
291de2362d3Smrg			 uint32_t size, uint64_t mc_addr,
292de2362d3Smrg			 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
293de2362d3Smrg{
294de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
295de2362d3Smrg    uint32_t cp_coher_size;
296de2362d3Smrg    if (size == 0xffffffff)
297de2362d3Smrg	cp_coher_size = 0xffffffff;
298de2362d3Smrg    else
299de2362d3Smrg	cp_coher_size = ((size + 255) >> 8);
300de2362d3Smrg
301de2362d3Smrg    BEGIN_BATCH(5 + 2);
302de2362d3Smrg    PACK3(IT_SURFACE_SYNC, 4);
303de2362d3Smrg    E32(sync_type);
304de2362d3Smrg    E32(cp_coher_size);
305de2362d3Smrg    E32((mc_addr >> 8));
306de2362d3Smrg    E32(10); /* poll interval */
307de2362d3Smrg    RELOC_BATCH(bo, rdomains, wdomain);
308de2362d3Smrg    END_BATCH();
309de2362d3Smrg}
310de2362d3Smrg
311de2362d3Smrg/* inserts a wait for vline in the command stream */
312de2362d3Smrgvoid
313de2362d3Smrgr600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
314de2362d3Smrg			xf86CrtcPtr crtc, int start, int stop)
315de2362d3Smrg{
316de2362d3Smrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
317de2362d3Smrg    drmmode_crtc_private_ptr drmmode_crtc;
318de2362d3Smrg
319de2362d3Smrg    if (!crtc)
320de2362d3Smrg        return;
321de2362d3Smrg
322de2362d3Smrg    if (!crtc->enabled)
323de2362d3Smrg        return;
324de2362d3Smrg
325de2362d3Smrg    if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
326de2362d3Smrg        return;
327de2362d3Smrg
328de2362d3Smrg    start = max(start, crtc->y);
329de2362d3Smrg    stop = min(stop, crtc->y + crtc->mode.VDisplay);
330de2362d3Smrg
331de2362d3Smrg    if (start >= stop)
332de2362d3Smrg        return;
333de2362d3Smrg
334de2362d3Smrg    drmmode_crtc = crtc->driver_private;
335de2362d3Smrg
336de2362d3Smrg    BEGIN_BATCH(11);
337de2362d3Smrg    /* set the VLINE range */
338de2362d3Smrg    EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
339de2362d3Smrg	 (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
340de2362d3Smrg	 (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
341de2362d3Smrg
342de2362d3Smrg    /* tell the CP to poll the VLINE state register */
343de2362d3Smrg    PACK3(IT_WAIT_REG_MEM, 6);
344de2362d3Smrg    E32(IT_WAIT_REG | IT_WAIT_EQ);
345de2362d3Smrg    E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
346de2362d3Smrg    E32(0);
347de2362d3Smrg    E32(0);                          // Ref value
348de2362d3Smrg    E32(AVIVO_D1MODE_VLINE_STAT);    // Mask
349de2362d3Smrg    E32(10);                         // Wait interval
350de2362d3Smrg    /* add crtc reloc */
351de2362d3Smrg    PACK3(IT_NOP, 1);
352de2362d3Smrg    E32(drmmode_crtc->mode_crtc->crtc_id);
353de2362d3Smrg    END_BATCH();
354de2362d3Smrg}
355de2362d3Smrg
356de2362d3Smrgvoid
357de2362d3Smrgr600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
358de2362d3Smrg{
359de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
360de2362d3Smrg
361de2362d3Smrg    BEGIN_BATCH(8);
362de2362d3Smrg    /* Interpolator setup */
363de2362d3Smrg    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
364de2362d3Smrg    PACK0(SPI_PS_IN_CONTROL_0, 3);
365de2362d3Smrg    E32((num_interp << NUM_INTERP_shift));
366de2362d3Smrg    E32(0);
367de2362d3Smrg    E32(0);
368de2362d3Smrg    END_BATCH();
369de2362d3Smrg}
370de2362d3Smrg
371de2362d3Smrgvoid
372de2362d3Smrgr600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
373de2362d3Smrg{
374de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
375de2362d3Smrg    uint32_t sq_pgm_resources;
376de2362d3Smrg
377de2362d3Smrg    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
378de2362d3Smrg			(fs_conf->stack_size << STACK_SIZE_shift));
379de2362d3Smrg
380de2362d3Smrg    if (fs_conf->dx10_clamp)
381de2362d3Smrg	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
382de2362d3Smrg
383de2362d3Smrg    BEGIN_BATCH(3 + 2);
384de2362d3Smrg    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
385de2362d3Smrg    RELOC_BATCH(fs_conf->bo, domain, 0);
386de2362d3Smrg    END_BATCH();
387de2362d3Smrg
388de2362d3Smrg    BEGIN_BATCH(6);
389de2362d3Smrg    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
390de2362d3Smrg    EREG(SQ_PGM_CF_OFFSET_FS, 0);
391de2362d3Smrg    END_BATCH();
392de2362d3Smrg}
393de2362d3Smrg
394de2362d3Smrgvoid
395de2362d3Smrgr600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
396de2362d3Smrg{
397de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
398de2362d3Smrg    uint32_t sq_pgm_resources;
399de2362d3Smrg
400de2362d3Smrg    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
401de2362d3Smrg			(vs_conf->stack_size << STACK_SIZE_shift));
402de2362d3Smrg
403de2362d3Smrg    if (vs_conf->dx10_clamp)
404de2362d3Smrg	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
405de2362d3Smrg    if (vs_conf->fetch_cache_lines)
406de2362d3Smrg	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
407de2362d3Smrg    if (vs_conf->uncached_first_inst)
408de2362d3Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
409de2362d3Smrg
410de2362d3Smrg    /* flush SQ cache */
411de2362d3Smrg    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
412de2362d3Smrg			     vs_conf->shader_size, vs_conf->shader_addr,
413de2362d3Smrg			     vs_conf->bo, domain, 0);
414de2362d3Smrg
415de2362d3Smrg    BEGIN_BATCH(3 + 2);
416de2362d3Smrg    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
417de2362d3Smrg    RELOC_BATCH(vs_conf->bo, domain, 0);
418de2362d3Smrg    END_BATCH();
419de2362d3Smrg
420de2362d3Smrg    BEGIN_BATCH(6);
421de2362d3Smrg    EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources);
422de2362d3Smrg    EREG(SQ_PGM_CF_OFFSET_VS, 0);
423de2362d3Smrg    END_BATCH();
424de2362d3Smrg}
425de2362d3Smrg
426de2362d3Smrgvoid
427de2362d3Smrgr600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
428de2362d3Smrg{
429de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
430de2362d3Smrg    uint32_t sq_pgm_resources;
431de2362d3Smrg
432de2362d3Smrg    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
433de2362d3Smrg			(ps_conf->stack_size << STACK_SIZE_shift));
434de2362d3Smrg
435de2362d3Smrg    if (ps_conf->dx10_clamp)
436de2362d3Smrg	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
437de2362d3Smrg    if (ps_conf->fetch_cache_lines)
438de2362d3Smrg	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
439de2362d3Smrg    if (ps_conf->uncached_first_inst)
440de2362d3Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
441de2362d3Smrg    if (ps_conf->clamp_consts)
442de2362d3Smrg	sq_pgm_resources |= CLAMP_CONSTS_bit;
443de2362d3Smrg
444de2362d3Smrg    /* flush SQ cache */
445de2362d3Smrg    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
446de2362d3Smrg			     ps_conf->shader_size, ps_conf->shader_addr,
447de2362d3Smrg			     ps_conf->bo, domain, 0);
448de2362d3Smrg
449de2362d3Smrg    BEGIN_BATCH(3 + 2);
450de2362d3Smrg    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
451de2362d3Smrg    RELOC_BATCH(ps_conf->bo, domain, 0);
452de2362d3Smrg    END_BATCH();
453de2362d3Smrg
454de2362d3Smrg    BEGIN_BATCH(9);
455de2362d3Smrg    EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources);
456de2362d3Smrg    EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
457de2362d3Smrg    EREG(SQ_PGM_CF_OFFSET_PS, 0);
458de2362d3Smrg    END_BATCH();
459de2362d3Smrg}
460de2362d3Smrg
461de2362d3Smrgvoid
462de2362d3Smrgr600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf)
463de2362d3Smrg{
464de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
465de2362d3Smrg    int i;
466de2362d3Smrg    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
467de2362d3Smrg
468de2362d3Smrg    BEGIN_BATCH(2 + countreg);
469de2362d3Smrg    PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
470de2362d3Smrg    for (i = 0; i < countreg; i++)
471de2362d3Smrg	EFLOAT(const_buf[i]);
472de2362d3Smrg    END_BATCH();
473de2362d3Smrg}
474de2362d3Smrg
475de2362d3Smrgvoid
476de2362d3Smrgr600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
477de2362d3Smrg{
478de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
479de2362d3Smrg    /* bool register order is: ps, vs, gs; one register each
480de2362d3Smrg     * 1 bits per bool; 32 bools each for ps, vs, gs.
481de2362d3Smrg     */
482de2362d3Smrg    BEGIN_BATCH(3);
483de2362d3Smrg    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
484de2362d3Smrg    END_BATCH();
485de2362d3Smrg}
486de2362d3Smrg
487de2362d3Smrgstatic void
488de2362d3Smrgr600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
489de2362d3Smrg{
490de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
491de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
492de2362d3Smrg    uint32_t sq_vtx_constant_word2;
493de2362d3Smrg
494de2362d3Smrg    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
495de2362d3Smrg			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
496de2362d3Smrg			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
497de2362d3Smrg			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
498de2362d3Smrg			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
499de2362d3Smrg    if (res->clamp_x)
500de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
501de2362d3Smrg
502de2362d3Smrg    if (res->format_comp_all)
503de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
504de2362d3Smrg
505de2362d3Smrg    if (res->srf_mode_all)
506de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
507de2362d3Smrg
508de2362d3Smrg    /* flush vertex cache */
509de2362d3Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
510de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
511de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
512de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS880) ||
513de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
514de2362d3Smrg	r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
515de2362d3Smrg				 accel_state->vbo.vb_offset, 0,
516de2362d3Smrg				 res->bo,
517de2362d3Smrg				 domain, 0);
518de2362d3Smrg    else
519de2362d3Smrg	r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
520de2362d3Smrg				 accel_state->vbo.vb_offset, 0,
521de2362d3Smrg				 res->bo,
522de2362d3Smrg				 domain, 0);
523de2362d3Smrg
524de2362d3Smrg    BEGIN_BATCH(9 + 2);
525de2362d3Smrg    PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
526de2362d3Smrg    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
527de2362d3Smrg    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
528de2362d3Smrg    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
529de2362d3Smrg    E32(res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
530de2362d3Smrg    E32(0);							// 4: n/a
531de2362d3Smrg    E32(0);							// 5: n/a
532de2362d3Smrg    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
533de2362d3Smrg    RELOC_BATCH(res->bo, domain, 0);
534de2362d3Smrg    END_BATCH();
535de2362d3Smrg}
536de2362d3Smrg
537de2362d3Smrgvoid
538de2362d3Smrgr600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
539de2362d3Smrg{
540de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
541de2362d3Smrg    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
542de2362d3Smrg    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
543de2362d3Smrg    uint32_t array_mode, pitch;
544de2362d3Smrg
545de2362d3Smrg    if (tex_res->surface) {
546de2362d3Smrg	switch (tex_res->surface->level[0].mode) {
547de2362d3Smrg	case RADEON_SURF_MODE_1D:
548de2362d3Smrg		array_mode = 2;
549de2362d3Smrg		break;
550de2362d3Smrg	case RADEON_SURF_MODE_2D:
551de2362d3Smrg		array_mode = 4;
552de2362d3Smrg		break;
553de2362d3Smrg	default:
554de2362d3Smrg		array_mode = 0;
555de2362d3Smrg		break;
556de2362d3Smrg	}
557de2362d3Smrg	pitch = tex_res->surface->level[0].nblk_x >> 3;
558de2362d3Smrg    } else
559de2362d3Smrg    {
560de2362d3Smrg	array_mode = tex_res->tile_mode;
561de2362d3Smrg	pitch = (tex_res->pitch + 7) >> 3;
562de2362d3Smrg    }
563de2362d3Smrg
564de2362d3Smrg    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
565de2362d3Smrg		     (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
566de2362d3Smrg
567de2362d3Smrg    if (tex_res->w)
568de2362d3Smrg	sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) |
569de2362d3Smrg				  ((tex_res->w - 1) << TEX_WIDTH_shift));
570de2362d3Smrg
571de2362d3Smrg    if (tex_res->tile_type)
572de2362d3Smrg	sq_tex_resource_word0 |= TILE_TYPE_bit;
573de2362d3Smrg
574de2362d3Smrg    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
575de2362d3Smrg
576de2362d3Smrg    if (tex_res->h)
577de2362d3Smrg	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
578de2362d3Smrg    if (tex_res->depth)
579de2362d3Smrg	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
580de2362d3Smrg
581de2362d3Smrg    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
582de2362d3Smrg			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
583de2362d3Smrg			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
584de2362d3Smrg			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
585de2362d3Smrg			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
586de2362d3Smrg			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
587de2362d3Smrg			     (tex_res->request_size << REQUEST_SIZE_shift) |
588de2362d3Smrg			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
589de2362d3Smrg			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
590de2362d3Smrg			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
591de2362d3Smrg			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
592de2362d3Smrg			     (tex_res->base_level << BASE_LEVEL_shift));
593de2362d3Smrg
594de2362d3Smrg    if (tex_res->srf_mode_all)
595de2362d3Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
596de2362d3Smrg    if (tex_res->force_degamma)
597de2362d3Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
598de2362d3Smrg
599de2362d3Smrg    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
600de2362d3Smrg			     (tex_res->base_array << BASE_ARRAY_shift) |
601de2362d3Smrg			     (tex_res->last_array << LAST_ARRAY_shift));
602de2362d3Smrg
603de2362d3Smrg    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
604de2362d3Smrg			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
605de2362d3Smrg			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
606de2362d3Smrg
607de2362d3Smrg    if (tex_res->interlaced)
608de2362d3Smrg	sq_tex_resource_word6 |= INTERLACED_bit;
609de2362d3Smrg
610de2362d3Smrg    /* flush texture cache */
611de2362d3Smrg    r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
612de2362d3Smrg			     tex_res->size, tex_res->base,
613de2362d3Smrg			     tex_res->bo, domain, 0);
614de2362d3Smrg
615de2362d3Smrg    BEGIN_BATCH(9 + 4);
616de2362d3Smrg    PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
617de2362d3Smrg    E32(sq_tex_resource_word0);
618de2362d3Smrg    E32(sq_tex_resource_word1);
619de2362d3Smrg    E32(((tex_res->base) >> 8));
620de2362d3Smrg    E32(((tex_res->mip_base) >> 8));
621de2362d3Smrg    E32(sq_tex_resource_word4);
622de2362d3Smrg    E32(sq_tex_resource_word5);
623de2362d3Smrg    E32(sq_tex_resource_word6);
624de2362d3Smrg    RELOC_BATCH(tex_res->bo, domain, 0);
625de2362d3Smrg    RELOC_BATCH(tex_res->mip_bo, domain, 0);
626de2362d3Smrg    END_BATCH();
627de2362d3Smrg}
628de2362d3Smrg
629de2362d3Smrgvoid
630de2362d3Smrgr600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
631de2362d3Smrg{
632de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
633de2362d3Smrg    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
634de2362d3Smrg
635de2362d3Smrg    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
636de2362d3Smrg			    (s->clamp_y       << CLAMP_Y_shift)					|
637de2362d3Smrg			    (s->clamp_z       << CLAMP_Z_shift)					|
638de2362d3Smrg			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
639de2362d3Smrg			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
640de2362d3Smrg			    (s->z_filter      << Z_FILTER_shift)	|
641de2362d3Smrg			    (s->mip_filter    << MIP_FILTER_shift)				|
642de2362d3Smrg			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
643de2362d3Smrg			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
644de2362d3Smrg			    (s->chroma_key    << CHROMA_KEY_shift));
645de2362d3Smrg    if (s->point_sampling_clamp)
646de2362d3Smrg	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
647de2362d3Smrg    if (s->tex_array_override)
648de2362d3Smrg	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
649de2362d3Smrg    if (s->lod_uses_minor_axis)
650de2362d3Smrg	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
651de2362d3Smrg
652de2362d3Smrg    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
653de2362d3Smrg			    (s->max_lod       << MAX_LOD_shift)					|
654de2362d3Smrg			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
655de2362d3Smrg
656de2362d3Smrg    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
657de2362d3Smrg			    (s->perf_mip      << PERF_MIP_shift)	|
658de2362d3Smrg			    (s->perf_z        << PERF_Z_shift));
659de2362d3Smrg    if (s->mc_coord_truncate)
660de2362d3Smrg	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
661de2362d3Smrg    if (s->force_degamma)
662de2362d3Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
663de2362d3Smrg    if (s->high_precision_filter)
664de2362d3Smrg	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
665de2362d3Smrg    if (s->fetch_4)
666de2362d3Smrg	sq_tex_sampler_word2 |= FETCH_4_bit;
667de2362d3Smrg    if (s->sample_is_pcf)
668de2362d3Smrg	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
669de2362d3Smrg    if (s->type)
670de2362d3Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
671de2362d3Smrg
672de2362d3Smrg    BEGIN_BATCH(5);
673de2362d3Smrg    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
674de2362d3Smrg    E32(sq_tex_sampler_word0);
675de2362d3Smrg    E32(sq_tex_sampler_word1);
676de2362d3Smrg    E32(sq_tex_sampler_word2);
677de2362d3Smrg    END_BATCH();
678de2362d3Smrg}
679de2362d3Smrg
680de2362d3Smrg//XXX deal with clip offsets in clip setup
681de2362d3Smrgvoid
682de2362d3Smrgr600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
683de2362d3Smrg{
684de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
685de2362d3Smrg
686de2362d3Smrg    BEGIN_BATCH(4);
687de2362d3Smrg    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
688de2362d3Smrg    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
689de2362d3Smrg	     (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
690de2362d3Smrg    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
691de2362d3Smrg	     (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
692de2362d3Smrg    END_BATCH();
693de2362d3Smrg}
694de2362d3Smrg
695de2362d3Smrgvoid
696de2362d3Smrgr600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
697de2362d3Smrg{
698de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
699de2362d3Smrg
700de2362d3Smrg    BEGIN_BATCH(4);
701de2362d3Smrg    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
702de2362d3Smrg    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
703de2362d3Smrg	     (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
704de2362d3Smrg	     WINDOW_OFFSET_DISABLE_bit));
705de2362d3Smrg    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
706de2362d3Smrg	     (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
707de2362d3Smrg    END_BATCH();
708de2362d3Smrg}
709de2362d3Smrg
710de2362d3Smrgvoid
711de2362d3Smrgr600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
712de2362d3Smrg{
713de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
714de2362d3Smrg
715de2362d3Smrg    BEGIN_BATCH(4);
716de2362d3Smrg    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
717de2362d3Smrg    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
718de2362d3Smrg	     (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
719de2362d3Smrg	     WINDOW_OFFSET_DISABLE_bit));
720de2362d3Smrg    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
721de2362d3Smrg	     (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
722de2362d3Smrg    END_BATCH();
723de2362d3Smrg}
724de2362d3Smrg
725de2362d3Smrgvoid
726de2362d3Smrgr600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
727de2362d3Smrg{
728de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
729de2362d3Smrg
730de2362d3Smrg    BEGIN_BATCH(4);
731de2362d3Smrg    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
732de2362d3Smrg    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
733de2362d3Smrg	     (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
734de2362d3Smrg	     WINDOW_OFFSET_DISABLE_bit));
735de2362d3Smrg    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
736de2362d3Smrg	      (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
737de2362d3Smrg    END_BATCH();
738de2362d3Smrg}
739de2362d3Smrg
740de2362d3Smrgvoid
741de2362d3Smrgr600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
742de2362d3Smrg{
743de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
744de2362d3Smrg
745de2362d3Smrg    BEGIN_BATCH(4);
746de2362d3Smrg    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
747de2362d3Smrg    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
748de2362d3Smrg	     (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
749de2362d3Smrg    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
750de2362d3Smrg	     (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
751de2362d3Smrg    END_BATCH();
752de2362d3Smrg}
753de2362d3Smrg
754de2362d3Smrg/*
755de2362d3Smrg * Setup of default state
756de2362d3Smrg */
757de2362d3Smrg
758de2362d3Smrgvoid
759de2362d3Smrgr600_set_default_state(ScrnInfoPtr pScrn)
760de2362d3Smrg{
761de2362d3Smrg    tex_resource_t tex_res;
762de2362d3Smrg    shader_config_t fs_conf;
763de2362d3Smrg    sq_config_t sq_conf;
764de2362d3Smrg    int i;
765de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
766de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
767de2362d3Smrg
768de2362d3Smrg    if (accel_state->XInited3D)
769de2362d3Smrg	return;
770de2362d3Smrg
771de2362d3Smrg    memset(&tex_res, 0, sizeof(tex_resource_t));
772de2362d3Smrg    memset(&fs_conf, 0, sizeof(shader_config_t));
773de2362d3Smrg
774de2362d3Smrg    accel_state->XInited3D = TRUE;
775de2362d3Smrg
776de2362d3Smrg    r600_start_3d(pScrn);
777de2362d3Smrg
778de2362d3Smrg    // SQ
779de2362d3Smrg    sq_conf.ps_prio = 0;
780de2362d3Smrg    sq_conf.vs_prio = 1;
781de2362d3Smrg    sq_conf.gs_prio = 2;
782de2362d3Smrg    sq_conf.es_prio = 3;
783de2362d3Smrg    // need to set stack/thread/gpr limits based on the asic
784de2362d3Smrg    // for now just set them low enough so any card will work
785de2362d3Smrg    // see r600_cp.c in the drm
786de2362d3Smrg    switch (info->ChipFamily) {
787de2362d3Smrg    case CHIP_FAMILY_R600:
788de2362d3Smrg	sq_conf.num_ps_gprs = 192;
789de2362d3Smrg	sq_conf.num_vs_gprs = 56;
790de2362d3Smrg	sq_conf.num_temp_gprs = 4;
791de2362d3Smrg	sq_conf.num_gs_gprs = 0;
792de2362d3Smrg	sq_conf.num_es_gprs = 0;
793de2362d3Smrg	sq_conf.num_ps_threads = 136;
794de2362d3Smrg	sq_conf.num_vs_threads = 48;
795de2362d3Smrg	sq_conf.num_gs_threads = 4;
796de2362d3Smrg	sq_conf.num_es_threads = 4;
797de2362d3Smrg	sq_conf.num_ps_stack_entries = 128;
798de2362d3Smrg	sq_conf.num_vs_stack_entries = 128;
799de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
800de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
801de2362d3Smrg	break;
802de2362d3Smrg    case CHIP_FAMILY_RV630:
803de2362d3Smrg    case CHIP_FAMILY_RV635:
804de2362d3Smrg	sq_conf.num_ps_gprs = 84;
805de2362d3Smrg	sq_conf.num_vs_gprs = 36;
806de2362d3Smrg	sq_conf.num_temp_gprs = 4;
807de2362d3Smrg	sq_conf.num_gs_gprs = 0;
808de2362d3Smrg	sq_conf.num_es_gprs = 0;
809de2362d3Smrg	sq_conf.num_ps_threads = 144;
810de2362d3Smrg	sq_conf.num_vs_threads = 40;
811de2362d3Smrg	sq_conf.num_gs_threads = 4;
812de2362d3Smrg	sq_conf.num_es_threads = 4;
813de2362d3Smrg	sq_conf.num_ps_stack_entries = 40;
814de2362d3Smrg	sq_conf.num_vs_stack_entries = 40;
815de2362d3Smrg	sq_conf.num_gs_stack_entries = 32;
816de2362d3Smrg	sq_conf.num_es_stack_entries = 16;
817de2362d3Smrg	break;
818de2362d3Smrg    case CHIP_FAMILY_RV610:
819de2362d3Smrg    case CHIP_FAMILY_RV620:
820de2362d3Smrg    case CHIP_FAMILY_RS780:
821de2362d3Smrg    case CHIP_FAMILY_RS880:
822de2362d3Smrg    default:
823de2362d3Smrg	sq_conf.num_ps_gprs = 84;
824de2362d3Smrg	sq_conf.num_vs_gprs = 36;
825de2362d3Smrg	sq_conf.num_temp_gprs = 4;
826de2362d3Smrg	sq_conf.num_gs_gprs = 0;
827de2362d3Smrg	sq_conf.num_es_gprs = 0;
828de2362d3Smrg	sq_conf.num_ps_threads = 136;
829de2362d3Smrg	sq_conf.num_vs_threads = 48;
830de2362d3Smrg	sq_conf.num_gs_threads = 4;
831de2362d3Smrg	sq_conf.num_es_threads = 4;
832de2362d3Smrg	sq_conf.num_ps_stack_entries = 40;
833de2362d3Smrg	sq_conf.num_vs_stack_entries = 40;
834de2362d3Smrg	sq_conf.num_gs_stack_entries = 32;
835de2362d3Smrg	sq_conf.num_es_stack_entries = 16;
836de2362d3Smrg	break;
837de2362d3Smrg    case CHIP_FAMILY_RV670:
838de2362d3Smrg	sq_conf.num_ps_gprs = 144;
839de2362d3Smrg	sq_conf.num_vs_gprs = 40;
840de2362d3Smrg	sq_conf.num_temp_gprs = 4;
841de2362d3Smrg	sq_conf.num_gs_gprs = 0;
842de2362d3Smrg	sq_conf.num_es_gprs = 0;
843de2362d3Smrg	sq_conf.num_ps_threads = 136;
844de2362d3Smrg	sq_conf.num_vs_threads = 48;
845de2362d3Smrg	sq_conf.num_gs_threads = 4;
846de2362d3Smrg	sq_conf.num_es_threads = 4;
847de2362d3Smrg	sq_conf.num_ps_stack_entries = 40;
848de2362d3Smrg	sq_conf.num_vs_stack_entries = 40;
849de2362d3Smrg	sq_conf.num_gs_stack_entries = 32;
850de2362d3Smrg	sq_conf.num_es_stack_entries = 16;
851de2362d3Smrg	break;
852de2362d3Smrg    case CHIP_FAMILY_RV770:
853de2362d3Smrg	sq_conf.num_ps_gprs = 192;
854de2362d3Smrg	sq_conf.num_vs_gprs = 56;
855de2362d3Smrg	sq_conf.num_temp_gprs = 4;
856de2362d3Smrg	sq_conf.num_gs_gprs = 0;
857de2362d3Smrg	sq_conf.num_es_gprs = 0;
858de2362d3Smrg	sq_conf.num_ps_threads = 188;
859de2362d3Smrg	sq_conf.num_vs_threads = 60;
860de2362d3Smrg	sq_conf.num_gs_threads = 0;
861de2362d3Smrg	sq_conf.num_es_threads = 0;
862de2362d3Smrg	sq_conf.num_ps_stack_entries = 256;
863de2362d3Smrg	sq_conf.num_vs_stack_entries = 256;
864de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
865de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
866de2362d3Smrg	break;
867de2362d3Smrg    case CHIP_FAMILY_RV730:
868de2362d3Smrg    case CHIP_FAMILY_RV740:
869de2362d3Smrg	sq_conf.num_ps_gprs = 84;
870de2362d3Smrg	sq_conf.num_vs_gprs = 36;
871de2362d3Smrg	sq_conf.num_temp_gprs = 4;
872de2362d3Smrg	sq_conf.num_gs_gprs = 0;
873de2362d3Smrg	sq_conf.num_es_gprs = 0;
874de2362d3Smrg	sq_conf.num_ps_threads = 188;
875de2362d3Smrg	sq_conf.num_vs_threads = 60;
876de2362d3Smrg	sq_conf.num_gs_threads = 0;
877de2362d3Smrg	sq_conf.num_es_threads = 0;
878de2362d3Smrg	sq_conf.num_ps_stack_entries = 128;
879de2362d3Smrg	sq_conf.num_vs_stack_entries = 128;
880de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
881de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
882de2362d3Smrg	break;
883de2362d3Smrg    case CHIP_FAMILY_RV710:
884de2362d3Smrg	sq_conf.num_ps_gprs = 192;
885de2362d3Smrg	sq_conf.num_vs_gprs = 56;
886de2362d3Smrg	sq_conf.num_temp_gprs = 4;
887de2362d3Smrg	sq_conf.num_gs_gprs = 0;
888de2362d3Smrg	sq_conf.num_es_gprs = 0;
889de2362d3Smrg	sq_conf.num_ps_threads = 144;
890de2362d3Smrg	sq_conf.num_vs_threads = 48;
891de2362d3Smrg	sq_conf.num_gs_threads = 0;
892de2362d3Smrg	sq_conf.num_es_threads = 0;
893de2362d3Smrg	sq_conf.num_ps_stack_entries = 128;
894de2362d3Smrg	sq_conf.num_vs_stack_entries = 128;
895de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
896de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
897de2362d3Smrg	break;
898de2362d3Smrg    }
899de2362d3Smrg
900de2362d3Smrg    r600_sq_setup(pScrn, &sq_conf);
901de2362d3Smrg
902de2362d3Smrg    /* set fake reloc for unused depth */
903de2362d3Smrg    BEGIN_BATCH(3 + 2);
904de2362d3Smrg    EREG(DB_DEPTH_INFO, 0);
905de2362d3Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
906de2362d3Smrg    END_BATCH();
907de2362d3Smrg
908de2362d3Smrg    BEGIN_BATCH(80);
909de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770) {
910de2362d3Smrg	EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
911de2362d3Smrg			       (28 << TD_FIFO_CREDIT_shift)));
912de2362d3Smrg	EREG(VC_ENHANCE, 0);
913de2362d3Smrg	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
914de2362d3Smrg	EREG(DB_DEBUG, 0x82000000); /* ? */
915de2362d3Smrg	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
916de2362d3Smrg				 (16 << DEPTH_FLUSH_shift) |
917de2362d3Smrg				 (0 << FORCE_SUMMARIZE_shift) |
918de2362d3Smrg				 (4 << DEPTH_PENDING_FREE_shift) |
919de2362d3Smrg				 (16 << DEPTH_CACHELINE_FREE_shift) |
920de2362d3Smrg				 0));
921de2362d3Smrg    } else {
922de2362d3Smrg	EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
923de2362d3Smrg			       (28 << TD_FIFO_CREDIT_shift)));
924de2362d3Smrg	EREG(VC_ENHANCE, 0);
925de2362d3Smrg	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
926de2362d3Smrg	EREG(DB_DEBUG, 0);
927de2362d3Smrg	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
928de2362d3Smrg				 (16 << DEPTH_FLUSH_shift) |
929de2362d3Smrg				 (0 << FORCE_SUMMARIZE_shift) |
930de2362d3Smrg				 (4 << DEPTH_PENDING_FREE_shift) |
931de2362d3Smrg				 (4 << DEPTH_CACHELINE_FREE_shift) |
932de2362d3Smrg				 0));
933de2362d3Smrg    }
934de2362d3Smrg
935de2362d3Smrg    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
936de2362d3Smrg    E32(0);
937de2362d3Smrg    E32(0);
938de2362d3Smrg
939de2362d3Smrg    PACK0(SQ_ESGS_RING_ITEMSIZE, 9);
940de2362d3Smrg    E32(0); // SQ_ESGS_RING_ITEMSIZE
941de2362d3Smrg    E32(0); // SQ_GSVS_RING_ITEMSIZE
942de2362d3Smrg    E32(0); // SQ_ESTMP_RING_ITEMSIZE
943de2362d3Smrg    E32(0); // SQ_GSTMP_RING_ITEMSIZE
944de2362d3Smrg    E32(0); // SQ_VSTMP_RING_ITEMSIZE
945de2362d3Smrg    E32(0); // SQ_PSTMP_RING_ITEMSIZE
946de2362d3Smrg    E32(0); // SQ_FBUF_RING_ITEMSIZE
947de2362d3Smrg    E32(0); // SQ_REDUC_RING_ITEMSIZE
948de2362d3Smrg    E32(0); // SQ_GS_VERT_ITEMSIZE
949de2362d3Smrg
950de2362d3Smrg    // DB
951de2362d3Smrg    EREG(DB_DEPTH_CONTROL,                    0);
952de2362d3Smrg    PACK0(DB_RENDER_CONTROL, 2);
953de2362d3Smrg    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
954de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
955de2362d3Smrg	E32(FORCE_SHADER_Z_ORDER_bit);
956de2362d3Smrg    else
957de2362d3Smrg	E32(0);
958de2362d3Smrg    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
959de2362d3Smrg						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
960de2362d3Smrg						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
961de2362d3Smrg						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
962de2362d3Smrg    EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
963de2362d3Smrg				 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
964de2362d3Smrg
965de2362d3Smrg    PACK0(DB_STENCIL_CLEAR, 2);
966de2362d3Smrg    E32(0); // DB_STENCIL_CLEAR
967de2362d3Smrg    E32(0); // DB_DEPTH_CLEAR
968de2362d3Smrg
969de2362d3Smrg    PACK0(DB_STENCILREFMASK, 3);
970de2362d3Smrg    E32(0); // DB_STENCILREFMASK
971de2362d3Smrg    E32(0); // DB_STENCILREFMASK_BF
972de2362d3Smrg    E32(0); // SX_ALPHA_REF
973de2362d3Smrg
974de2362d3Smrg    PACK0(CB_CLRCMP_CONTROL, 4);
975de2362d3Smrg    E32(1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
976de2362d3Smrg    E32(0);							// CB_CLRCMP_SRC
977de2362d3Smrg    E32(0);							// CB_CLRCMP_DST
978de2362d3Smrg    E32(0);							// CB_CLRCMP_MSK
979de2362d3Smrg
980de2362d3Smrg    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
981de2362d3Smrg    EREG(R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
982de2362d3Smrg
983de2362d3Smrg    PACK0(SX_ALPHA_TEST_CONTROL, 5);
984de2362d3Smrg    E32(0); // SX_ALPHA_TEST_CONTROL
985de2362d3Smrg    E32(0x00000000); // CB_BLEND_RED
986de2362d3Smrg    E32(0x00000000); // CB_BLEND_GREEN
987de2362d3Smrg    E32(0x00000000); // CB_BLEND_BLUE
988de2362d3Smrg    E32(0x00000000); // CB_BLEND_ALPHA
989de2362d3Smrg
990de2362d3Smrg    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
991de2362d3Smrg						   (0 << WINDOW_Y_OFFSET_shift)));
992de2362d3Smrg
993de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
994de2362d3Smrg	EREG(R7xx_PA_SC_EDGERULE,             0x00000000);
995de2362d3Smrg    else
996de2362d3Smrg	EREG(R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
997de2362d3Smrg
998de2362d3Smrg    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
999de2362d3Smrg
1000de2362d3Smrg    END_BATCH();
1001de2362d3Smrg
1002de2362d3Smrg    /* clip boolean is set to always visible -> doesn't matter */
1003de2362d3Smrg    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1004de2362d3Smrg	r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192);
1005de2362d3Smrg
1006de2362d3Smrg    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1007de2362d3Smrg	r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192);
1008de2362d3Smrg
1009de2362d3Smrg    BEGIN_BATCH(49);
1010de2362d3Smrg    PACK0(PA_SC_MPASS_PS_CNTL, 2);
1011de2362d3Smrg    E32(0);
1012de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
1013de2362d3Smrg	E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
1014de2362d3Smrg    else
1015de2362d3Smrg	E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
1016de2362d3Smrg		 0x00500000)); /* ? */
1017de2362d3Smrg
1018de2362d3Smrg    PACK0(PA_SC_LINE_CNTL, 9);
1019de2362d3Smrg    E32(0); // PA_SC_LINE_CNTL
1020de2362d3Smrg    E32(0); // PA_SC_AA_CONFIG
1021de2362d3Smrg    E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL
1022de2362d3Smrg	     (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
1023de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1024de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1025de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1026de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1027de2362d3Smrg    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_MCTX
1028de2362d3Smrg    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M
1029de2362d3Smrg
1030de2362d3Smrg    EREG(PA_SC_AA_MASK,                       0xFFFFFFFF);
1031de2362d3Smrg
1032de2362d3Smrg    PACK0(PA_CL_CLIP_CNTL, 5);
1033de2362d3Smrg    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1034de2362d3Smrg    E32(FACE_bit);         // PA_SU_SC_MODE_CNTL
1035de2362d3Smrg    E32(VTX_XY_FMT_bit);   // PA_CL_VTE_CNTL
1036de2362d3Smrg    E32(0);                // PA_CL_VS_OUT_CNTL
1037de2362d3Smrg    E32(0);                // PA_CL_NANINF_CNTL
1038de2362d3Smrg
1039de2362d3Smrg    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1040de2362d3Smrg    E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL
1041de2362d3Smrg    E32(0); // PA_SU_POLY_OFFSET_CLAMP
1042de2362d3Smrg    E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE
1043de2362d3Smrg    E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET
1044de2362d3Smrg    E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE
1045de2362d3Smrg    E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET
1046de2362d3Smrg
1047de2362d3Smrg    // SPI
1048de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
1049de2362d3Smrg	EREG(R7xx_SPI_THREAD_GROUPING,        0);
1050de2362d3Smrg    else
1051de2362d3Smrg	EREG(R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
1052de2362d3Smrg
1053de2362d3Smrg    /* default Interpolator setup */
1054de2362d3Smrg    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1055de2362d3Smrg			       (1 << SEMANTIC_1_shift)));
1056de2362d3Smrg    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1057de2362d3Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1058de2362d3Smrg    E32(((0    << SEMANTIC_shift)	|
1059de2362d3Smrg	     (0x01 << DEFAULT_VAL_shift)	|
1060de2362d3Smrg	     SEL_CENTROID_bit));
1061de2362d3Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1062de2362d3Smrg    E32(((1    << SEMANTIC_shift)	|
1063de2362d3Smrg	     (0x01 << DEFAULT_VAL_shift)	|
1064de2362d3Smrg	     SEL_CENTROID_bit));
1065de2362d3Smrg
1066de2362d3Smrg    PACK0(SPI_INPUT_Z, 4);
1067de2362d3Smrg    E32(0); // SPI_INPUT_Z
1068de2362d3Smrg    E32(0); // SPI_FOG_CNTL
1069de2362d3Smrg    E32(0); // SPI_FOG_FUNC_SCALE
1070de2362d3Smrg    E32(0); // SPI_FOG_FUNC_BIAS
1071de2362d3Smrg
1072de2362d3Smrg    END_BATCH();
1073de2362d3Smrg
1074de2362d3Smrg    // clear FS
1075de2362d3Smrg    fs_conf.bo = accel_state->shaders_bo;
1076de2362d3Smrg    r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1077de2362d3Smrg
1078de2362d3Smrg    // VGT
1079de2362d3Smrg    BEGIN_BATCH(46);
1080de2362d3Smrg    PACK0(VGT_MAX_VTX_INDX, 4);
1081de2362d3Smrg    E32(0xffffff); // VGT_MAX_VTX_INDX
1082de2362d3Smrg    E32(0); // VGT_MIN_VTX_INDX
1083de2362d3Smrg    E32(0); // VGT_INDX_OFFSET
1084de2362d3Smrg    E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX
1085de2362d3Smrg
1086de2362d3Smrg    EREG(VGT_PRIMITIVEID_EN,                  0);
1087de2362d3Smrg    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1088de2362d3Smrg
1089de2362d3Smrg    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1090de2362d3Smrg    E32(0); // VGT_INSTANCE_STEP_RATE_0
1091de2362d3Smrg    E32(0); // VGT_INSTANCE_STEP_RATE_1
1092de2362d3Smrg
1093de2362d3Smrg    PACK0(PA_SU_POINT_SIZE, 17);
1094de2362d3Smrg    E32(0); // PA_SU_POINT_SIZE
1095de2362d3Smrg    E32(0); // PA_SU_POINT_MINMAX
1096de2362d3Smrg    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1097de2362d3Smrg    E32(0); // PA_SC_LINE_STIPPLE
1098de2362d3Smrg    E32(0); // VGT_OUTPUT_PATH_CNTL
1099de2362d3Smrg    E32(0); // VGT_HOS_CNTL
1100de2362d3Smrg    E32(0); // VGT_HOS_MAX_TESS_LEVEL
1101de2362d3Smrg    E32(0); // VGT_HOS_MIN_TESS_LEVEL
1102de2362d3Smrg    E32(0); // VGT_HOS_REUSE_DEPTH
1103de2362d3Smrg    E32(0); // VGT_GROUP_PRIM_TYPE
1104de2362d3Smrg    E32(0); // VGT_GROUP_FIRST_DECR
1105de2362d3Smrg    E32(0); // VGT_GROUP_DECR
1106de2362d3Smrg    E32(0); // VGT_GROUP_VECT_0_CNTL
1107de2362d3Smrg    E32(0); // VGT_GROUP_VECT_1_CNTL
1108de2362d3Smrg    E32(0); // VGT_GROUP_VECT_0_FMT_CNTL
1109de2362d3Smrg    E32(0); // VGT_GROUP_VECT_1_FMT_CNTL
1110de2362d3Smrg    E32(0); // VGT_GS_MODE
1111de2362d3Smrg
1112de2362d3Smrg    PACK0(VGT_STRMOUT_EN, 3);
1113de2362d3Smrg    E32(0); // VGT_STRMOUT_EN
1114de2362d3Smrg    E32(0); // VGT_REUSE_OFF
1115de2362d3Smrg    E32(0); // VGT_VTX_CNT_EN
1116de2362d3Smrg
1117de2362d3Smrg    EREG(VGT_STRMOUT_BUFFER_EN,               0);
1118de2362d3Smrg    EREG(SX_MISC,                             0);
1119de2362d3Smrg    END_BATCH();
1120de2362d3Smrg}
1121de2362d3Smrg
1122de2362d3Smrg
1123de2362d3Smrg/*
1124de2362d3Smrg * Commands
1125de2362d3Smrg */
1126de2362d3Smrg
1127de2362d3Smrgvoid
1128de2362d3Smrgr600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices)
1129de2362d3Smrg{
1130de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1131de2362d3Smrg    uint32_t i, count;
1132de2362d3Smrg
1133de2362d3Smrg    // calculate num of packets
1134de2362d3Smrg    count = 2;
1135de2362d3Smrg    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1136de2362d3Smrg	count += (draw_conf->num_indices + 1) / 2;
1137de2362d3Smrg    else
1138de2362d3Smrg	count += draw_conf->num_indices;
1139de2362d3Smrg
1140de2362d3Smrg    BEGIN_BATCH(8 + count);
1141de2362d3Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1142de2362d3Smrg    PACK3(IT_INDEX_TYPE, 1);
1143de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1144de2362d3Smrg    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1145de2362d3Smrg#else
1146de2362d3Smrg    E32(draw_conf->index_type);
1147de2362d3Smrg#endif
1148de2362d3Smrg    PACK3(IT_NUM_INSTANCES, 1);
1149de2362d3Smrg    E32(draw_conf->num_instances);
1150de2362d3Smrg
1151de2362d3Smrg    PACK3(IT_DRAW_INDEX_IMMD, count);
1152de2362d3Smrg    E32(draw_conf->num_indices);
1153de2362d3Smrg    E32(draw_conf->vgt_draw_initiator);
1154de2362d3Smrg
1155de2362d3Smrg    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1156de2362d3Smrg	for (i = 0; i < draw_conf->num_indices; i += 2) {
1157de2362d3Smrg	    if ((i + 1) == draw_conf->num_indices)
1158de2362d3Smrg		E32(indices[i]);
1159de2362d3Smrg	    else
1160de2362d3Smrg		E32((indices[i] | (indices[i + 1] << 16)));
1161de2362d3Smrg	}
1162de2362d3Smrg    } else {
1163de2362d3Smrg	for (i = 0; i < draw_conf->num_indices; i++)
1164de2362d3Smrg	    E32(indices[i]);
1165de2362d3Smrg    }
1166de2362d3Smrg    END_BATCH();
1167de2362d3Smrg}
1168de2362d3Smrg
1169de2362d3Smrgvoid
1170de2362d3Smrgr600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1171de2362d3Smrg{
1172de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1173de2362d3Smrg
1174de2362d3Smrg    BEGIN_BATCH(10);
1175de2362d3Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1176de2362d3Smrg    PACK3(IT_INDEX_TYPE, 1);
1177de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1178de2362d3Smrg    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1179de2362d3Smrg#else
1180de2362d3Smrg    E32(draw_conf->index_type);
1181de2362d3Smrg#endif
1182de2362d3Smrg    PACK3(IT_NUM_INSTANCES, 1);
1183de2362d3Smrg    E32(draw_conf->num_instances);
1184de2362d3Smrg    PACK3(IT_DRAW_INDEX_AUTO, 2);
1185de2362d3Smrg    E32(draw_conf->num_indices);
1186de2362d3Smrg    E32(draw_conf->vgt_draw_initiator);
1187de2362d3Smrg    END_BATCH();
1188de2362d3Smrg}
1189de2362d3Smrg
1190de2362d3Smrgvoid r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1191de2362d3Smrg{
1192de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1193de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1194de2362d3Smrg    draw_config_t   draw_conf;
1195de2362d3Smrg    vtx_resource_t  vtx_res;
1196de2362d3Smrg
1197de2362d3Smrg    if (accel_state->vbo.vb_start_op == -1)
1198de2362d3Smrg	return;
1199de2362d3Smrg
1200de2362d3Smrg    CLEAR (draw_conf);
1201de2362d3Smrg    CLEAR (vtx_res);
1202de2362d3Smrg
1203de2362d3Smrg    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1204de2362d3Smrg        R600IBDiscard(pScrn);
1205de2362d3Smrg	return;
1206de2362d3Smrg    }
1207de2362d3Smrg
1208de2362d3Smrg    /* Vertex buffer setup */
1209de2362d3Smrg    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1210de2362d3Smrg    vtx_res.id              = SQ_VTX_RESOURCE_vs;
1211de2362d3Smrg    vtx_res.vtx_size_dw     = vtx_size / 4;
1212de2362d3Smrg    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1213de2362d3Smrg    vtx_res.mem_req_size    = 1;
1214de2362d3Smrg    vtx_res.vb_addr         = accel_state->vbo.vb_start_op;
1215de2362d3Smrg    vtx_res.bo              = accel_state->vbo.vb_bo;
1216de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1217de2362d3Smrg    vtx_res.endian          = SQ_ENDIAN_8IN32;
1218de2362d3Smrg#endif
1219de2362d3Smrg    r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1220de2362d3Smrg
1221de2362d3Smrg    /* Draw */
1222de2362d3Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
1223de2362d3Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1224de2362d3Smrg    draw_conf.num_instances      = 1;
1225de2362d3Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1226de2362d3Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1227de2362d3Smrg
1228de2362d3Smrg    r600_draw_auto(pScrn, &draw_conf);
1229de2362d3Smrg
1230de2362d3Smrg    /* XXX drm should handle this in fence submit */
1231de2362d3Smrg    r600_wait_3d_idle_clean(pScrn);
1232de2362d3Smrg
1233de2362d3Smrg    /* sync dst surface */
1234de2362d3Smrg    r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1235de2362d3Smrg			     accel_state->dst_size, 0,
1236de2362d3Smrg			     accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1237de2362d3Smrg
1238de2362d3Smrg    accel_state->vbo.vb_start_op = -1;
1239de2362d3Smrg    accel_state->ib_reset_op = 0;
1240de2362d3Smrg
1241de2362d3Smrg}
1242de2362d3Smrg
1243