1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Authors: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *          Matthias Hopf <mhopf@suse.de>
25de2362d3Smrg */
26de2362d3Smrg#ifdef HAVE_CONFIG_H
27de2362d3Smrg#include "config.h"
28de2362d3Smrg#endif
29de2362d3Smrg
30de2362d3Smrg#include "xf86.h"
31de2362d3Smrg
32de2362d3Smrg#include <errno.h>
33de2362d3Smrg
34de2362d3Smrg#include "radeon.h"
35de2362d3Smrg#include "r600_shader.h"
36de2362d3Smrg#include "radeon_reg.h"
37de2362d3Smrg#include "r600_reg.h"
38de2362d3Smrg#include "r600_state.h"
39de2362d3Smrg
40de2362d3Smrg#include "radeon_vbo.h"
41de2362d3Smrg#include "radeon_exa_shared.h"
42de2362d3Smrg
43de2362d3Smrgstatic const uint32_t R600_ROP[16] = {
44de2362d3Smrg    RADEON_ROP3_ZERO, /* GXclear        */
45de2362d3Smrg    RADEON_ROP3_DSa,  /* Gxand          */
46de2362d3Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
47de2362d3Smrg    RADEON_ROP3_S,    /* GXcopy         */
48de2362d3Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
49de2362d3Smrg    RADEON_ROP3_D,    /* GXnoop         */
50de2362d3Smrg    RADEON_ROP3_DSx,  /* GXxor          */
51de2362d3Smrg    RADEON_ROP3_DSo,  /* GXor           */
52de2362d3Smrg    RADEON_ROP3_DSon, /* GXnor          */
53de2362d3Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
54de2362d3Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
55de2362d3Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
56de2362d3Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
57de2362d3Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
58de2362d3Smrg    RADEON_ROP3_DSan, /* GXnand         */
59de2362d3Smrg    RADEON_ROP3_ONE,  /* GXset          */
60de2362d3Smrg};
61de2362d3Smrg
62de2362d3Smrg/* we try and batch operations together under KMS -
63de2362d3Smrg   but it doesn't work yet without misrendering */
64de2362d3Smrg#define KMS_MULTI_OP 1
65de2362d3Smrg
66de2362d3Smrg/* Flush the indirect buffer to the kernel for submission to the card */
6718781e08Smrgvoid R600CPFlushIndirect(ScrnInfoPtr pScrn)
68de2362d3Smrg{
6918781e08Smrg    radeon_cs_flush_indirect(pScrn);
70de2362d3Smrg}
71de2362d3Smrg
7218781e08Smrgvoid R600IBDiscard(ScrnInfoPtr pScrn)
73de2362d3Smrg{
7418781e08Smrg    radeon_ib_discard(pScrn);
75de2362d3Smrg}
76de2362d3Smrg
77de2362d3Smrgvoid
7818781e08Smrgr600_wait_3d_idle_clean(ScrnInfoPtr pScrn)
79de2362d3Smrg{
80de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
81de2362d3Smrg
82de2362d3Smrg    //flush caches, don't generate timestamp
83de2362d3Smrg    BEGIN_BATCH(5);
8418781e08Smrg    PACK3(IT_EVENT_WRITE, 1);
8518781e08Smrg    E32(CACHE_FLUSH_AND_INV_EVENT);
86de2362d3Smrg    // wait for 3D idle clean
8718781e08Smrg    EREG(WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
88de2362d3Smrg						   WAIT_3D_IDLECLEAN_bit));
89de2362d3Smrg    END_BATCH();
90de2362d3Smrg}
91de2362d3Smrg
92de2362d3Smrgvoid
9318781e08Smrgr600_wait_3d_idle(ScrnInfoPtr pScrn)
94de2362d3Smrg{
95de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
96de2362d3Smrg
97de2362d3Smrg    BEGIN_BATCH(3);
9818781e08Smrg    EREG(WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
99de2362d3Smrg    END_BATCH();
100de2362d3Smrg}
101de2362d3Smrg
102de2362d3Smrgvoid
10318781e08Smrgr600_start_3d(ScrnInfoPtr pScrn)
104de2362d3Smrg{
105de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
106de2362d3Smrg
107de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770) {
108de2362d3Smrg	BEGIN_BATCH(5);
10918781e08Smrg	PACK3(IT_START_3D_CMDBUF, 1);
11018781e08Smrg	E32(0);
111de2362d3Smrg    } else
112de2362d3Smrg	BEGIN_BATCH(3);
113de2362d3Smrg
11418781e08Smrg    PACK3(IT_CONTEXT_CONTROL, 2);
11518781e08Smrg    E32(0x80000000);
11618781e08Smrg    E32(0x80000000);
117de2362d3Smrg    END_BATCH();
118de2362d3Smrg
119de2362d3Smrg}
120de2362d3Smrg
121de2362d3Smrg/*
122de2362d3Smrg * Setup of functional groups
123de2362d3Smrg */
124de2362d3Smrg
125de2362d3Smrg// asic stack/thread/gpr limits - need to query the drm
126de2362d3Smrgstatic void
12718781e08Smrgr600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
128de2362d3Smrg{
129de2362d3Smrg    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
130de2362d3Smrg    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
131de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
132de2362d3Smrg
133de2362d3Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
134de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
135de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
136de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS880) ||
137de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
138de2362d3Smrg	sq_config = 0;						// no VC
139de2362d3Smrg    else
140de2362d3Smrg	sq_config = VC_ENABLE_bit;
141de2362d3Smrg
142de2362d3Smrg    sq_config |= (DX9_CONSTS_bit |
143de2362d3Smrg		  ALU_INST_PREFER_VECTOR_bit |
144de2362d3Smrg		  (sq_conf->ps_prio << PS_PRIO_shift) |
145de2362d3Smrg		  (sq_conf->vs_prio << VS_PRIO_shift) |
146de2362d3Smrg		  (sq_conf->gs_prio << GS_PRIO_shift) |
147de2362d3Smrg		  (sq_conf->es_prio << ES_PRIO_shift));
148de2362d3Smrg
149de2362d3Smrg    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
150de2362d3Smrg			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
151de2362d3Smrg			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
152de2362d3Smrg    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
153de2362d3Smrg			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
154de2362d3Smrg
155de2362d3Smrg    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
156de2362d3Smrg			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
157de2362d3Smrg			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
158de2362d3Smrg			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
159de2362d3Smrg
160de2362d3Smrg    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
161de2362d3Smrg				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
162de2362d3Smrg
163de2362d3Smrg    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
164de2362d3Smrg				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
165de2362d3Smrg
166de2362d3Smrg    BEGIN_BATCH(8);
16718781e08Smrg    PACK0(SQ_CONFIG, 6);
16818781e08Smrg    E32(sq_config);
16918781e08Smrg    E32(sq_gpr_resource_mgmt_1);
17018781e08Smrg    E32(sq_gpr_resource_mgmt_2);
17118781e08Smrg    E32(sq_thread_resource_mgmt);
17218781e08Smrg    E32(sq_stack_resource_mgmt_1);
17318781e08Smrg    E32(sq_stack_resource_mgmt_2);
17418781e08Smrg    END_BATCH();
17518781e08Smrg}
17618781e08Smrg
17718781e08Smrgvoid r600_set_blend_color(ScrnInfoPtr pScrn, float *color)
17818781e08Smrg{
17918781e08Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
18018781e08Smrg
18118781e08Smrg    BEGIN_BATCH(2 + 4);
18218781e08Smrg    PACK0(CB_BLEND_RED, 4);
18318781e08Smrg    EFLOAT(color[0]); /* R */
18418781e08Smrg    EFLOAT(color[1]); /* G */
18518781e08Smrg    EFLOAT(color[2]); /* B */
18618781e08Smrg    EFLOAT(color[3]); /* A */
1877314432eSmrg    END_BATCH();
1887314432eSmrg}
1897314432eSmrg
19018781e08Smrg
191de2362d3Smrgvoid
19218781e08Smrgr600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
193de2362d3Smrg{
194de2362d3Smrg    uint32_t cb_color_info, cb_color_control;
195de2362d3Smrg    unsigned pitch, slice, h, array_mode;
196de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
197de2362d3Smrg
198de2362d3Smrg
199de2362d3Smrg    if (cb_conf->surface) {
200de2362d3Smrg	switch (cb_conf->surface->level[0].mode) {
201de2362d3Smrg	case RADEON_SURF_MODE_1D:
202de2362d3Smrg		array_mode = 2;
203de2362d3Smrg		break;
204de2362d3Smrg	case RADEON_SURF_MODE_2D:
205de2362d3Smrg		array_mode = 4;
206de2362d3Smrg		break;
207de2362d3Smrg	default:
208de2362d3Smrg		array_mode = 0;
209de2362d3Smrg		break;
210de2362d3Smrg	}
211de2362d3Smrg	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
212de2362d3Smrg	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
213de2362d3Smrg    } else
214de2362d3Smrg    {
215de2362d3Smrg	array_mode = cb_conf->array_mode;
216de2362d3Smrg	pitch = (cb_conf->w / 8) - 1;
217de2362d3Smrg	h = RADEON_ALIGN(cb_conf->h, 8);
218de2362d3Smrg	slice = ((cb_conf->w * h) / 64) - 1;
219de2362d3Smrg    }
220de2362d3Smrg
221de2362d3Smrg    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
222de2362d3Smrg		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
223de2362d3Smrg		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
224de2362d3Smrg		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
225de2362d3Smrg		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
226de2362d3Smrg		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
227de2362d3Smrg    if (cb_conf->read_size)
228de2362d3Smrg	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
229de2362d3Smrg    if (cb_conf->blend_clamp)
230de2362d3Smrg	cb_color_info |= BLEND_CLAMP_bit;
231de2362d3Smrg    if (cb_conf->clear_color)
232de2362d3Smrg	cb_color_info |= CLEAR_COLOR_bit;
233de2362d3Smrg    if (cb_conf->blend_bypass)
234de2362d3Smrg	cb_color_info |= BLEND_BYPASS_bit;
235de2362d3Smrg    if (cb_conf->blend_float32)
236de2362d3Smrg	cb_color_info |= BLEND_FLOAT32_bit;
237de2362d3Smrg    if (cb_conf->simple_float)
238de2362d3Smrg	cb_color_info |= SIMPLE_FLOAT_bit;
239de2362d3Smrg    if (cb_conf->round_mode)
240de2362d3Smrg	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
241de2362d3Smrg    if (cb_conf->tile_compact)
242de2362d3Smrg	cb_color_info |= TILE_COMPACT_bit;
243de2362d3Smrg    if (cb_conf->source_format)
244de2362d3Smrg	cb_color_info |= SOURCE_FORMAT_bit;
245de2362d3Smrg
246de2362d3Smrg    BEGIN_BATCH(3 + 2);
24718781e08Smrg    EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
248de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
249de2362d3Smrg    END_BATCH();
250de2362d3Smrg
251de2362d3Smrg    // rv6xx workaround
252de2362d3Smrg    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
253de2362d3Smrg        (info->ChipFamily < CHIP_FAMILY_RV770)) {
254de2362d3Smrg        BEGIN_BATCH(2);
25518781e08Smrg        PACK3(IT_SURFACE_BASE_UPDATE, 1);
25618781e08Smrg        E32((2 << cb_conf->id));
257de2362d3Smrg        END_BATCH();
258de2362d3Smrg    }
259de2362d3Smrg    /* Set CMASK & TILE buffer to the offset of color buffer as
260de2362d3Smrg     * we don't use those this shouldn't cause any issue and we
261de2362d3Smrg     * then have a valid cmd stream
262de2362d3Smrg     */
263de2362d3Smrg    BEGIN_BATCH(3 + 2);
26418781e08Smrg    EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
265de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
266de2362d3Smrg    END_BATCH();
267de2362d3Smrg    BEGIN_BATCH(3 + 2);
26818781e08Smrg    EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
269de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
270de2362d3Smrg    END_BATCH();
271de2362d3Smrg    BEGIN_BATCH(9);
272de2362d3Smrg    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
27318781e08Smrg    EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
274de2362d3Smrg						    (slice << SLICE_TILE_MAX_shift)));
27518781e08Smrg    EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
276de2362d3Smrg						    (0    << SLICE_MAX_shift)));
27718781e08Smrg    EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
278de2362d3Smrg						    (0    << FMASK_TILE_MAX_shift)));
279de2362d3Smrg    END_BATCH();
280de2362d3Smrg
281de2362d3Smrg    BEGIN_BATCH(3 + 2);
28218781e08Smrg    EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
283de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
284de2362d3Smrg    END_BATCH();
285de2362d3Smrg
286de2362d3Smrg    BEGIN_BATCH(9);
28718781e08Smrg    EREG(CB_TARGET_MASK,          (cb_conf->pmask << TARGET0_ENABLE_shift));
288de2362d3Smrg    cb_color_control = R600_ROP[cb_conf->rop] |
289de2362d3Smrg	(cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift);
290de2362d3Smrg    if (info->ChipFamily == CHIP_FAMILY_R600) {
291de2362d3Smrg	/* no per-MRT blend on R600 */
29218781e08Smrg	EREG(CB_COLOR_CONTROL,    cb_color_control);
29318781e08Smrg	EREG(CB_BLEND_CONTROL,    cb_conf->blendcntl);
294de2362d3Smrg    } else {
295de2362d3Smrg	if (cb_conf->blend_enable)
296de2362d3Smrg	    cb_color_control |= PER_MRT_BLEND_bit;
29718781e08Smrg	EREG(CB_COLOR_CONTROL,    cb_color_control);
29818781e08Smrg	EREG(CB_BLEND0_CONTROL,   cb_conf->blendcntl);
299de2362d3Smrg    }
300de2362d3Smrg    END_BATCH();
301de2362d3Smrg}
302de2362d3Smrg
303de2362d3Smrgstatic void
30418781e08Smrgr600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
305de2362d3Smrg			 uint32_t size, uint64_t mc_addr,
306de2362d3Smrg			 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
307de2362d3Smrg{
308de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
309de2362d3Smrg    uint32_t cp_coher_size;
310de2362d3Smrg    if (size == 0xffffffff)
311de2362d3Smrg	cp_coher_size = 0xffffffff;
312de2362d3Smrg    else
313de2362d3Smrg	cp_coher_size = ((size + 255) >> 8);
314de2362d3Smrg
315de2362d3Smrg    BEGIN_BATCH(5 + 2);
31618781e08Smrg    PACK3(IT_SURFACE_SYNC, 4);
31718781e08Smrg    E32(sync_type);
31818781e08Smrg    E32(cp_coher_size);
31918781e08Smrg    E32((mc_addr >> 8));
32018781e08Smrg    E32(10); /* poll interval */
321de2362d3Smrg    RELOC_BATCH(bo, rdomains, wdomain);
322de2362d3Smrg    END_BATCH();
323de2362d3Smrg}
324de2362d3Smrg
325de2362d3Smrg/* inserts a wait for vline in the command stream */
326de2362d3Smrgvoid
32718781e08Smrgr600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
328de2362d3Smrg			xf86CrtcPtr crtc, int start, int stop)
329de2362d3Smrg{
330de2362d3Smrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
33118781e08Smrg    drmmode_crtc_private_ptr drmmode_crtc;
332de2362d3Smrg
333de2362d3Smrg    if (!crtc)
334de2362d3Smrg        return;
335de2362d3Smrg
336de2362d3Smrg    if (!crtc->enabled)
337de2362d3Smrg        return;
338de2362d3Smrg
33918781e08Smrg    if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
34018781e08Smrg        return;
341de2362d3Smrg
342de2362d3Smrg    start = max(start, crtc->y);
343de2362d3Smrg    stop = min(stop, crtc->y + crtc->mode.VDisplay);
344de2362d3Smrg
345de2362d3Smrg    if (start >= stop)
346de2362d3Smrg        return;
347de2362d3Smrg
34818781e08Smrg    drmmode_crtc = crtc->driver_private;
34918781e08Smrg
35018781e08Smrg    BEGIN_BATCH(11);
35118781e08Smrg    /* set the VLINE range */
35218781e08Smrg    EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
35318781e08Smrg	 (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
35418781e08Smrg	 (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
35518781e08Smrg
35618781e08Smrg    /* tell the CP to poll the VLINE state register */
35718781e08Smrg    PACK3(IT_WAIT_REG_MEM, 6);
35818781e08Smrg    E32(IT_WAIT_REG | IT_WAIT_EQ);
35918781e08Smrg    E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
36018781e08Smrg    E32(0);
36118781e08Smrg    E32(0);                          // Ref value
36218781e08Smrg    E32(AVIVO_D1MODE_VLINE_STAT);    // Mask
36318781e08Smrg    E32(10);                         // Wait interval
36418781e08Smrg    /* add crtc reloc */
36518781e08Smrg    PACK3(IT_NOP, 1);
36618781e08Smrg    E32(drmmode_crtc->mode_crtc->crtc_id);
36718781e08Smrg    END_BATCH();
368de2362d3Smrg}
369de2362d3Smrg
370de2362d3Smrgvoid
37118781e08Smrgr600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
372de2362d3Smrg{
373de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
374de2362d3Smrg
375de2362d3Smrg    BEGIN_BATCH(8);
376de2362d3Smrg    /* Interpolator setup */
37718781e08Smrg    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
37818781e08Smrg    PACK0(SPI_PS_IN_CONTROL_0, 3);
37918781e08Smrg    E32((num_interp << NUM_INTERP_shift));
38018781e08Smrg    E32(0);
38118781e08Smrg    E32(0);
382de2362d3Smrg    END_BATCH();
383de2362d3Smrg}
384de2362d3Smrg
385de2362d3Smrgvoid
38618781e08Smrgr600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
387de2362d3Smrg{
388de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
389de2362d3Smrg    uint32_t sq_pgm_resources;
390de2362d3Smrg
391de2362d3Smrg    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
392de2362d3Smrg			(fs_conf->stack_size << STACK_SIZE_shift));
393de2362d3Smrg
394de2362d3Smrg    if (fs_conf->dx10_clamp)
395de2362d3Smrg	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
396de2362d3Smrg
397de2362d3Smrg    BEGIN_BATCH(3 + 2);
39818781e08Smrg    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
399de2362d3Smrg    RELOC_BATCH(fs_conf->bo, domain, 0);
400de2362d3Smrg    END_BATCH();
401de2362d3Smrg
402de2362d3Smrg    BEGIN_BATCH(6);
40318781e08Smrg    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
40418781e08Smrg    EREG(SQ_PGM_CF_OFFSET_FS, 0);
405de2362d3Smrg    END_BATCH();
406de2362d3Smrg}
407de2362d3Smrg
408de2362d3Smrgvoid
40918781e08Smrgr600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
410de2362d3Smrg{
411de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
412de2362d3Smrg    uint32_t sq_pgm_resources;
413de2362d3Smrg
414de2362d3Smrg    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
415de2362d3Smrg			(vs_conf->stack_size << STACK_SIZE_shift));
416de2362d3Smrg
417de2362d3Smrg    if (vs_conf->dx10_clamp)
418de2362d3Smrg	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
419de2362d3Smrg    if (vs_conf->fetch_cache_lines)
420de2362d3Smrg	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
421de2362d3Smrg    if (vs_conf->uncached_first_inst)
422de2362d3Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
423de2362d3Smrg
424de2362d3Smrg    /* flush SQ cache */
42518781e08Smrg    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
426de2362d3Smrg			     vs_conf->shader_size, vs_conf->shader_addr,
427de2362d3Smrg			     vs_conf->bo, domain, 0);
428de2362d3Smrg
429de2362d3Smrg    BEGIN_BATCH(3 + 2);
43018781e08Smrg    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
431de2362d3Smrg    RELOC_BATCH(vs_conf->bo, domain, 0);
432de2362d3Smrg    END_BATCH();
433de2362d3Smrg
434de2362d3Smrg    BEGIN_BATCH(6);
43518781e08Smrg    EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources);
43618781e08Smrg    EREG(SQ_PGM_CF_OFFSET_VS, 0);
437de2362d3Smrg    END_BATCH();
438de2362d3Smrg}
439de2362d3Smrg
440de2362d3Smrgvoid
44118781e08Smrgr600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
442de2362d3Smrg{
443de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
444de2362d3Smrg    uint32_t sq_pgm_resources;
445de2362d3Smrg
446de2362d3Smrg    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
447de2362d3Smrg			(ps_conf->stack_size << STACK_SIZE_shift));
448de2362d3Smrg
449de2362d3Smrg    if (ps_conf->dx10_clamp)
450de2362d3Smrg	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
451de2362d3Smrg    if (ps_conf->fetch_cache_lines)
452de2362d3Smrg	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
453de2362d3Smrg    if (ps_conf->uncached_first_inst)
454de2362d3Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
455de2362d3Smrg    if (ps_conf->clamp_consts)
456de2362d3Smrg	sq_pgm_resources |= CLAMP_CONSTS_bit;
457de2362d3Smrg
458de2362d3Smrg    /* flush SQ cache */
45918781e08Smrg    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
460de2362d3Smrg			     ps_conf->shader_size, ps_conf->shader_addr,
461de2362d3Smrg			     ps_conf->bo, domain, 0);
462de2362d3Smrg
463de2362d3Smrg    BEGIN_BATCH(3 + 2);
46418781e08Smrg    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
465de2362d3Smrg    RELOC_BATCH(ps_conf->bo, domain, 0);
466de2362d3Smrg    END_BATCH();
467de2362d3Smrg
468de2362d3Smrg    BEGIN_BATCH(9);
46918781e08Smrg    EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources);
47018781e08Smrg    EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
47118781e08Smrg    EREG(SQ_PGM_CF_OFFSET_PS, 0);
472de2362d3Smrg    END_BATCH();
473de2362d3Smrg}
474de2362d3Smrg
475de2362d3Smrgvoid
47618781e08Smrgr600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf)
477de2362d3Smrg{
478de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
479de2362d3Smrg    int i;
480de2362d3Smrg    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
481de2362d3Smrg
482de2362d3Smrg    BEGIN_BATCH(2 + countreg);
48318781e08Smrg    PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
484de2362d3Smrg    for (i = 0; i < countreg; i++)
48518781e08Smrg	EFLOAT(const_buf[i]);
486de2362d3Smrg    END_BATCH();
487de2362d3Smrg}
488de2362d3Smrg
489de2362d3Smrgvoid
49018781e08Smrgr600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
491de2362d3Smrg{
492de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
493de2362d3Smrg    /* bool register order is: ps, vs, gs; one register each
494de2362d3Smrg     * 1 bits per bool; 32 bools each for ps, vs, gs.
495de2362d3Smrg     */
496de2362d3Smrg    BEGIN_BATCH(3);
49718781e08Smrg    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
498de2362d3Smrg    END_BATCH();
499de2362d3Smrg}
500de2362d3Smrg
501de2362d3Smrgstatic void
50218781e08Smrgr600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
503de2362d3Smrg{
504de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
505de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
506de2362d3Smrg    uint32_t sq_vtx_constant_word2;
507de2362d3Smrg
508de2362d3Smrg    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
509de2362d3Smrg			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
510de2362d3Smrg			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
511de2362d3Smrg			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
512de2362d3Smrg			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
513de2362d3Smrg    if (res->clamp_x)
514de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
515de2362d3Smrg
516de2362d3Smrg    if (res->format_comp_all)
517de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
518de2362d3Smrg
519de2362d3Smrg    if (res->srf_mode_all)
520de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
521de2362d3Smrg
522de2362d3Smrg    /* flush vertex cache */
523de2362d3Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
524de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
525de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
526de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RS880) ||
527de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
52818781e08Smrg	r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
52918781e08Smrg				 accel_state->vbo.vb_offset, 0,
530de2362d3Smrg				 res->bo,
531de2362d3Smrg				 domain, 0);
532de2362d3Smrg    else
53318781e08Smrg	r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
53418781e08Smrg				 accel_state->vbo.vb_offset, 0,
535de2362d3Smrg				 res->bo,
536de2362d3Smrg				 domain, 0);
537de2362d3Smrg
538de2362d3Smrg    BEGIN_BATCH(9 + 2);
53918781e08Smrg    PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
54018781e08Smrg    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
54118781e08Smrg    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
54218781e08Smrg    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
54318781e08Smrg    E32(res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
54418781e08Smrg    E32(0);							// 4: n/a
54518781e08Smrg    E32(0);							// 5: n/a
54618781e08Smrg    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
547de2362d3Smrg    RELOC_BATCH(res->bo, domain, 0);
548de2362d3Smrg    END_BATCH();
549de2362d3Smrg}
550de2362d3Smrg
551de2362d3Smrgvoid
55218781e08Smrgr600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
553de2362d3Smrg{
554de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
555de2362d3Smrg    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
556de2362d3Smrg    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
557de2362d3Smrg    uint32_t array_mode, pitch;
558de2362d3Smrg
559de2362d3Smrg    if (tex_res->surface) {
560de2362d3Smrg	switch (tex_res->surface->level[0].mode) {
561de2362d3Smrg	case RADEON_SURF_MODE_1D:
562de2362d3Smrg		array_mode = 2;
563de2362d3Smrg		break;
564de2362d3Smrg	case RADEON_SURF_MODE_2D:
565de2362d3Smrg		array_mode = 4;
566de2362d3Smrg		break;
567de2362d3Smrg	default:
568de2362d3Smrg		array_mode = 0;
569de2362d3Smrg		break;
570de2362d3Smrg	}
571de2362d3Smrg	pitch = tex_res->surface->level[0].nblk_x >> 3;
572de2362d3Smrg    } else
573de2362d3Smrg    {
574de2362d3Smrg	array_mode = tex_res->tile_mode;
575de2362d3Smrg	pitch = (tex_res->pitch + 7) >> 3;
576de2362d3Smrg    }
577de2362d3Smrg
578de2362d3Smrg    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
579de2362d3Smrg		     (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
580de2362d3Smrg
581de2362d3Smrg    if (tex_res->w)
582de2362d3Smrg	sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) |
583de2362d3Smrg				  ((tex_res->w - 1) << TEX_WIDTH_shift));
584de2362d3Smrg
585de2362d3Smrg    if (tex_res->tile_type)
586de2362d3Smrg	sq_tex_resource_word0 |= TILE_TYPE_bit;
587de2362d3Smrg
588de2362d3Smrg    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
589de2362d3Smrg
590de2362d3Smrg    if (tex_res->h)
591de2362d3Smrg	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
592de2362d3Smrg    if (tex_res->depth)
593de2362d3Smrg	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
594de2362d3Smrg
595de2362d3Smrg    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
596de2362d3Smrg			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
597de2362d3Smrg			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
598de2362d3Smrg			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
599de2362d3Smrg			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
600de2362d3Smrg			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
601de2362d3Smrg			     (tex_res->request_size << REQUEST_SIZE_shift) |
602de2362d3Smrg			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
603de2362d3Smrg			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
604de2362d3Smrg			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
605de2362d3Smrg			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
606de2362d3Smrg			     (tex_res->base_level << BASE_LEVEL_shift));
607de2362d3Smrg
608de2362d3Smrg    if (tex_res->srf_mode_all)
609de2362d3Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
610de2362d3Smrg    if (tex_res->force_degamma)
611de2362d3Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
612de2362d3Smrg
613de2362d3Smrg    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
614de2362d3Smrg			     (tex_res->base_array << BASE_ARRAY_shift) |
615de2362d3Smrg			     (tex_res->last_array << LAST_ARRAY_shift));
616de2362d3Smrg
617de2362d3Smrg    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
618de2362d3Smrg			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
619de2362d3Smrg			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
620de2362d3Smrg
621de2362d3Smrg    if (tex_res->interlaced)
622de2362d3Smrg	sq_tex_resource_word6 |= INTERLACED_bit;
623de2362d3Smrg
624de2362d3Smrg    /* flush texture cache */
62518781e08Smrg    r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
626de2362d3Smrg			     tex_res->size, tex_res->base,
627de2362d3Smrg			     tex_res->bo, domain, 0);
628de2362d3Smrg
629de2362d3Smrg    BEGIN_BATCH(9 + 4);
63018781e08Smrg    PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
63118781e08Smrg    E32(sq_tex_resource_word0);
63218781e08Smrg    E32(sq_tex_resource_word1);
63318781e08Smrg    E32(((tex_res->base) >> 8));
63418781e08Smrg    E32(((tex_res->mip_base) >> 8));
63518781e08Smrg    E32(sq_tex_resource_word4);
63618781e08Smrg    E32(sq_tex_resource_word5);
63718781e08Smrg    E32(sq_tex_resource_word6);
638de2362d3Smrg    RELOC_BATCH(tex_res->bo, domain, 0);
639de2362d3Smrg    RELOC_BATCH(tex_res->mip_bo, domain, 0);
640de2362d3Smrg    END_BATCH();
641de2362d3Smrg}
642de2362d3Smrg
643de2362d3Smrgvoid
64418781e08Smrgr600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
645de2362d3Smrg{
646de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
647de2362d3Smrg    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
648de2362d3Smrg
649de2362d3Smrg    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
650de2362d3Smrg			    (s->clamp_y       << CLAMP_Y_shift)					|
651de2362d3Smrg			    (s->clamp_z       << CLAMP_Z_shift)					|
652de2362d3Smrg			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
653de2362d3Smrg			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
654de2362d3Smrg			    (s->z_filter      << Z_FILTER_shift)	|
655de2362d3Smrg			    (s->mip_filter    << MIP_FILTER_shift)				|
656de2362d3Smrg			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
657de2362d3Smrg			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
658de2362d3Smrg			    (s->chroma_key    << CHROMA_KEY_shift));
659de2362d3Smrg    if (s->point_sampling_clamp)
660de2362d3Smrg	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
661de2362d3Smrg    if (s->tex_array_override)
662de2362d3Smrg	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
663de2362d3Smrg    if (s->lod_uses_minor_axis)
664de2362d3Smrg	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
665de2362d3Smrg
666de2362d3Smrg    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
667de2362d3Smrg			    (s->max_lod       << MAX_LOD_shift)					|
668de2362d3Smrg			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
669de2362d3Smrg
670de2362d3Smrg    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
671de2362d3Smrg			    (s->perf_mip      << PERF_MIP_shift)	|
672de2362d3Smrg			    (s->perf_z        << PERF_Z_shift));
673de2362d3Smrg    if (s->mc_coord_truncate)
674de2362d3Smrg	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
675de2362d3Smrg    if (s->force_degamma)
676de2362d3Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
677de2362d3Smrg    if (s->high_precision_filter)
678de2362d3Smrg	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
679de2362d3Smrg    if (s->fetch_4)
680de2362d3Smrg	sq_tex_sampler_word2 |= FETCH_4_bit;
681de2362d3Smrg    if (s->sample_is_pcf)
682de2362d3Smrg	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
683de2362d3Smrg    if (s->type)
684de2362d3Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
685de2362d3Smrg
686de2362d3Smrg    BEGIN_BATCH(5);
68718781e08Smrg    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
68818781e08Smrg    E32(sq_tex_sampler_word0);
68918781e08Smrg    E32(sq_tex_sampler_word1);
69018781e08Smrg    E32(sq_tex_sampler_word2);
691de2362d3Smrg    END_BATCH();
692de2362d3Smrg}
693de2362d3Smrg
694de2362d3Smrg//XXX deal with clip offsets in clip setup
695de2362d3Smrgvoid
69618781e08Smrgr600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
697de2362d3Smrg{
698de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
699de2362d3Smrg
700de2362d3Smrg    BEGIN_BATCH(4);
70118781e08Smrg    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
70218781e08Smrg    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
703de2362d3Smrg	     (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
70418781e08Smrg    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
705de2362d3Smrg	     (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
706de2362d3Smrg    END_BATCH();
707de2362d3Smrg}
708de2362d3Smrg
709de2362d3Smrgvoid
71018781e08Smrgr600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
711de2362d3Smrg{
712de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
713de2362d3Smrg
714de2362d3Smrg    BEGIN_BATCH(4);
71518781e08Smrg    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
71618781e08Smrg    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
717de2362d3Smrg	     (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
718de2362d3Smrg	     WINDOW_OFFSET_DISABLE_bit));
71918781e08Smrg    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
720de2362d3Smrg	     (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
721de2362d3Smrg    END_BATCH();
722de2362d3Smrg}
723de2362d3Smrg
724de2362d3Smrgvoid
72518781e08Smrgr600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
726de2362d3Smrg{
727de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
728de2362d3Smrg
729de2362d3Smrg    BEGIN_BATCH(4);
73018781e08Smrg    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
73118781e08Smrg    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
732de2362d3Smrg	     (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
733de2362d3Smrg	     WINDOW_OFFSET_DISABLE_bit));
73418781e08Smrg    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
735de2362d3Smrg	     (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
736de2362d3Smrg    END_BATCH();
737de2362d3Smrg}
738de2362d3Smrg
739de2362d3Smrgvoid
74018781e08Smrgr600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
741de2362d3Smrg{
742de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
743de2362d3Smrg
744de2362d3Smrg    BEGIN_BATCH(4);
74518781e08Smrg    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
74618781e08Smrg    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
747de2362d3Smrg	     (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
748de2362d3Smrg	     WINDOW_OFFSET_DISABLE_bit));
74918781e08Smrg    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
750de2362d3Smrg	      (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
751de2362d3Smrg    END_BATCH();
752de2362d3Smrg}
753de2362d3Smrg
754de2362d3Smrgvoid
75518781e08Smrgr600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
756de2362d3Smrg{
757de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
758de2362d3Smrg
759de2362d3Smrg    BEGIN_BATCH(4);
76018781e08Smrg    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
76118781e08Smrg    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
762de2362d3Smrg	     (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
76318781e08Smrg    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
764de2362d3Smrg	     (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
765de2362d3Smrg    END_BATCH();
766de2362d3Smrg}
767de2362d3Smrg
768de2362d3Smrg/*
769de2362d3Smrg * Setup of default state
770de2362d3Smrg */
771de2362d3Smrg
772de2362d3Smrgvoid
77318781e08Smrgr600_set_default_state(ScrnInfoPtr pScrn)
774de2362d3Smrg{
775de2362d3Smrg    tex_resource_t tex_res;
776de2362d3Smrg    shader_config_t fs_conf;
777de2362d3Smrg    sq_config_t sq_conf;
778de2362d3Smrg    int i;
779de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
780de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
781de2362d3Smrg
782de2362d3Smrg    if (accel_state->XInited3D)
783de2362d3Smrg	return;
784de2362d3Smrg
785de2362d3Smrg    memset(&tex_res, 0, sizeof(tex_resource_t));
786de2362d3Smrg    memset(&fs_conf, 0, sizeof(shader_config_t));
787de2362d3Smrg
788de2362d3Smrg    accel_state->XInited3D = TRUE;
789de2362d3Smrg
79018781e08Smrg    r600_start_3d(pScrn);
791de2362d3Smrg
792de2362d3Smrg    // SQ
793de2362d3Smrg    sq_conf.ps_prio = 0;
794de2362d3Smrg    sq_conf.vs_prio = 1;
795de2362d3Smrg    sq_conf.gs_prio = 2;
796de2362d3Smrg    sq_conf.es_prio = 3;
797de2362d3Smrg    // need to set stack/thread/gpr limits based on the asic
798de2362d3Smrg    // for now just set them low enough so any card will work
799de2362d3Smrg    // see r600_cp.c in the drm
800de2362d3Smrg    switch (info->ChipFamily) {
801de2362d3Smrg    case CHIP_FAMILY_R600:
802de2362d3Smrg	sq_conf.num_ps_gprs = 192;
803de2362d3Smrg	sq_conf.num_vs_gprs = 56;
804de2362d3Smrg	sq_conf.num_temp_gprs = 4;
805de2362d3Smrg	sq_conf.num_gs_gprs = 0;
806de2362d3Smrg	sq_conf.num_es_gprs = 0;
807de2362d3Smrg	sq_conf.num_ps_threads = 136;
808de2362d3Smrg	sq_conf.num_vs_threads = 48;
809de2362d3Smrg	sq_conf.num_gs_threads = 4;
810de2362d3Smrg	sq_conf.num_es_threads = 4;
811de2362d3Smrg	sq_conf.num_ps_stack_entries = 128;
812de2362d3Smrg	sq_conf.num_vs_stack_entries = 128;
813de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
814de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
815de2362d3Smrg	break;
816de2362d3Smrg    case CHIP_FAMILY_RV630:
817de2362d3Smrg    case CHIP_FAMILY_RV635:
818de2362d3Smrg	sq_conf.num_ps_gprs = 84;
819de2362d3Smrg	sq_conf.num_vs_gprs = 36;
820de2362d3Smrg	sq_conf.num_temp_gprs = 4;
821de2362d3Smrg	sq_conf.num_gs_gprs = 0;
822de2362d3Smrg	sq_conf.num_es_gprs = 0;
823de2362d3Smrg	sq_conf.num_ps_threads = 144;
824de2362d3Smrg	sq_conf.num_vs_threads = 40;
825de2362d3Smrg	sq_conf.num_gs_threads = 4;
826de2362d3Smrg	sq_conf.num_es_threads = 4;
827de2362d3Smrg	sq_conf.num_ps_stack_entries = 40;
828de2362d3Smrg	sq_conf.num_vs_stack_entries = 40;
829de2362d3Smrg	sq_conf.num_gs_stack_entries = 32;
830de2362d3Smrg	sq_conf.num_es_stack_entries = 16;
831de2362d3Smrg	break;
832de2362d3Smrg    case CHIP_FAMILY_RV610:
833de2362d3Smrg    case CHIP_FAMILY_RV620:
834de2362d3Smrg    case CHIP_FAMILY_RS780:
835de2362d3Smrg    case CHIP_FAMILY_RS880:
836de2362d3Smrg    default:
837de2362d3Smrg	sq_conf.num_ps_gprs = 84;
838de2362d3Smrg	sq_conf.num_vs_gprs = 36;
839de2362d3Smrg	sq_conf.num_temp_gprs = 4;
840de2362d3Smrg	sq_conf.num_gs_gprs = 0;
841de2362d3Smrg	sq_conf.num_es_gprs = 0;
842de2362d3Smrg	sq_conf.num_ps_threads = 136;
843de2362d3Smrg	sq_conf.num_vs_threads = 48;
844de2362d3Smrg	sq_conf.num_gs_threads = 4;
845de2362d3Smrg	sq_conf.num_es_threads = 4;
846de2362d3Smrg	sq_conf.num_ps_stack_entries = 40;
847de2362d3Smrg	sq_conf.num_vs_stack_entries = 40;
848de2362d3Smrg	sq_conf.num_gs_stack_entries = 32;
849de2362d3Smrg	sq_conf.num_es_stack_entries = 16;
850de2362d3Smrg	break;
851de2362d3Smrg    case CHIP_FAMILY_RV670:
852de2362d3Smrg	sq_conf.num_ps_gprs = 144;
853de2362d3Smrg	sq_conf.num_vs_gprs = 40;
854de2362d3Smrg	sq_conf.num_temp_gprs = 4;
855de2362d3Smrg	sq_conf.num_gs_gprs = 0;
856de2362d3Smrg	sq_conf.num_es_gprs = 0;
857de2362d3Smrg	sq_conf.num_ps_threads = 136;
858de2362d3Smrg	sq_conf.num_vs_threads = 48;
859de2362d3Smrg	sq_conf.num_gs_threads = 4;
860de2362d3Smrg	sq_conf.num_es_threads = 4;
861de2362d3Smrg	sq_conf.num_ps_stack_entries = 40;
862de2362d3Smrg	sq_conf.num_vs_stack_entries = 40;
863de2362d3Smrg	sq_conf.num_gs_stack_entries = 32;
864de2362d3Smrg	sq_conf.num_es_stack_entries = 16;
865de2362d3Smrg	break;
866de2362d3Smrg    case CHIP_FAMILY_RV770:
867de2362d3Smrg	sq_conf.num_ps_gprs = 192;
868de2362d3Smrg	sq_conf.num_vs_gprs = 56;
869de2362d3Smrg	sq_conf.num_temp_gprs = 4;
870de2362d3Smrg	sq_conf.num_gs_gprs = 0;
871de2362d3Smrg	sq_conf.num_es_gprs = 0;
872de2362d3Smrg	sq_conf.num_ps_threads = 188;
873de2362d3Smrg	sq_conf.num_vs_threads = 60;
874de2362d3Smrg	sq_conf.num_gs_threads = 0;
875de2362d3Smrg	sq_conf.num_es_threads = 0;
876de2362d3Smrg	sq_conf.num_ps_stack_entries = 256;
877de2362d3Smrg	sq_conf.num_vs_stack_entries = 256;
878de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
879de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
880de2362d3Smrg	break;
881de2362d3Smrg    case CHIP_FAMILY_RV730:
882de2362d3Smrg    case CHIP_FAMILY_RV740:
883de2362d3Smrg	sq_conf.num_ps_gprs = 84;
884de2362d3Smrg	sq_conf.num_vs_gprs = 36;
885de2362d3Smrg	sq_conf.num_temp_gprs = 4;
886de2362d3Smrg	sq_conf.num_gs_gprs = 0;
887de2362d3Smrg	sq_conf.num_es_gprs = 0;
888de2362d3Smrg	sq_conf.num_ps_threads = 188;
889de2362d3Smrg	sq_conf.num_vs_threads = 60;
890de2362d3Smrg	sq_conf.num_gs_threads = 0;
891de2362d3Smrg	sq_conf.num_es_threads = 0;
892de2362d3Smrg	sq_conf.num_ps_stack_entries = 128;
893de2362d3Smrg	sq_conf.num_vs_stack_entries = 128;
894de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
895de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
896de2362d3Smrg	break;
897de2362d3Smrg    case CHIP_FAMILY_RV710:
898de2362d3Smrg	sq_conf.num_ps_gprs = 192;
899de2362d3Smrg	sq_conf.num_vs_gprs = 56;
900de2362d3Smrg	sq_conf.num_temp_gprs = 4;
901de2362d3Smrg	sq_conf.num_gs_gprs = 0;
902de2362d3Smrg	sq_conf.num_es_gprs = 0;
903de2362d3Smrg	sq_conf.num_ps_threads = 144;
904de2362d3Smrg	sq_conf.num_vs_threads = 48;
905de2362d3Smrg	sq_conf.num_gs_threads = 0;
906de2362d3Smrg	sq_conf.num_es_threads = 0;
907de2362d3Smrg	sq_conf.num_ps_stack_entries = 128;
908de2362d3Smrg	sq_conf.num_vs_stack_entries = 128;
909de2362d3Smrg	sq_conf.num_gs_stack_entries = 0;
910de2362d3Smrg	sq_conf.num_es_stack_entries = 0;
911de2362d3Smrg	break;
912de2362d3Smrg    }
913de2362d3Smrg
91418781e08Smrg    r600_sq_setup(pScrn, &sq_conf);
915de2362d3Smrg
916de2362d3Smrg    /* set fake reloc for unused depth */
917de2362d3Smrg    BEGIN_BATCH(3 + 2);
91818781e08Smrg    EREG(DB_DEPTH_INFO, 0);
919de2362d3Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
920de2362d3Smrg    END_BATCH();
921de2362d3Smrg
922de2362d3Smrg    BEGIN_BATCH(80);
923de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770) {
92418781e08Smrg	EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
925de2362d3Smrg			       (28 << TD_FIFO_CREDIT_shift)));
92618781e08Smrg	EREG(VC_ENHANCE, 0);
92718781e08Smrg	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
92818781e08Smrg	EREG(DB_DEBUG, 0x82000000); /* ? */
92918781e08Smrg	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
930de2362d3Smrg				 (16 << DEPTH_FLUSH_shift) |
931de2362d3Smrg				 (0 << FORCE_SUMMARIZE_shift) |
932de2362d3Smrg				 (4 << DEPTH_PENDING_FREE_shift) |
933de2362d3Smrg				 (16 << DEPTH_CACHELINE_FREE_shift) |
934de2362d3Smrg				 0));
935de2362d3Smrg    } else {
93618781e08Smrg	EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
937de2362d3Smrg			       (28 << TD_FIFO_CREDIT_shift)));
93818781e08Smrg	EREG(VC_ENHANCE, 0);
93918781e08Smrg	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
94018781e08Smrg	EREG(DB_DEBUG, 0);
94118781e08Smrg	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
942de2362d3Smrg				 (16 << DEPTH_FLUSH_shift) |
943de2362d3Smrg				 (0 << FORCE_SUMMARIZE_shift) |
944de2362d3Smrg				 (4 << DEPTH_PENDING_FREE_shift) |
945de2362d3Smrg				 (4 << DEPTH_CACHELINE_FREE_shift) |
946de2362d3Smrg				 0));
947de2362d3Smrg    }
948de2362d3Smrg
94918781e08Smrg    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
95018781e08Smrg    E32(0);
95118781e08Smrg    E32(0);
95218781e08Smrg
95318781e08Smrg    PACK0(SQ_ESGS_RING_ITEMSIZE, 9);
95418781e08Smrg    E32(0); // SQ_ESGS_RING_ITEMSIZE
95518781e08Smrg    E32(0); // SQ_GSVS_RING_ITEMSIZE
95618781e08Smrg    E32(0); // SQ_ESTMP_RING_ITEMSIZE
95718781e08Smrg    E32(0); // SQ_GSTMP_RING_ITEMSIZE
95818781e08Smrg    E32(0); // SQ_VSTMP_RING_ITEMSIZE
95918781e08Smrg    E32(0); // SQ_PSTMP_RING_ITEMSIZE
96018781e08Smrg    E32(0); // SQ_FBUF_RING_ITEMSIZE
96118781e08Smrg    E32(0); // SQ_REDUC_RING_ITEMSIZE
96218781e08Smrg    E32(0); // SQ_GS_VERT_ITEMSIZE
963de2362d3Smrg
964de2362d3Smrg    // DB
96518781e08Smrg    EREG(DB_DEPTH_CONTROL,                    0);
96618781e08Smrg    PACK0(DB_RENDER_CONTROL, 2);
96718781e08Smrg    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
968de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
96918781e08Smrg	E32(FORCE_SHADER_Z_ORDER_bit);
970de2362d3Smrg    else
97118781e08Smrg	E32(0);
97218781e08Smrg    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
973de2362d3Smrg						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
974de2362d3Smrg						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
975de2362d3Smrg						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
97618781e08Smrg    EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
977de2362d3Smrg				 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
978de2362d3Smrg
97918781e08Smrg    PACK0(DB_STENCIL_CLEAR, 2);
98018781e08Smrg    E32(0); // DB_STENCIL_CLEAR
98118781e08Smrg    E32(0); // DB_DEPTH_CLEAR
982de2362d3Smrg
98318781e08Smrg    PACK0(DB_STENCILREFMASK, 3);
98418781e08Smrg    E32(0); // DB_STENCILREFMASK
98518781e08Smrg    E32(0); // DB_STENCILREFMASK_BF
98618781e08Smrg    E32(0); // SX_ALPHA_REF
987de2362d3Smrg
98818781e08Smrg    PACK0(CB_CLRCMP_CONTROL, 4);
98918781e08Smrg    E32(1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
99018781e08Smrg    E32(0);							// CB_CLRCMP_SRC
99118781e08Smrg    E32(0);							// CB_CLRCMP_DST
99218781e08Smrg    E32(0);							// CB_CLRCMP_MSK
993de2362d3Smrg
99418781e08Smrg    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
99518781e08Smrg    EREG(R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
996de2362d3Smrg
99718781e08Smrg    PACK0(SX_ALPHA_TEST_CONTROL, 5);
99818781e08Smrg    E32(0); // SX_ALPHA_TEST_CONTROL
99918781e08Smrg    E32(0x00000000); // CB_BLEND_RED
100018781e08Smrg    E32(0x00000000); // CB_BLEND_GREEN
100118781e08Smrg    E32(0x00000000); // CB_BLEND_BLUE
100218781e08Smrg    E32(0x00000000); // CB_BLEND_ALPHA
1003de2362d3Smrg
100418781e08Smrg    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1005de2362d3Smrg						   (0 << WINDOW_Y_OFFSET_shift)));
1006de2362d3Smrg
1007de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
100818781e08Smrg	EREG(R7xx_PA_SC_EDGERULE,             0x00000000);
1009de2362d3Smrg    else
101018781e08Smrg	EREG(R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
1011de2362d3Smrg
101218781e08Smrg    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1013de2362d3Smrg
1014de2362d3Smrg    END_BATCH();
1015de2362d3Smrg
1016de2362d3Smrg    /* clip boolean is set to always visible -> doesn't matter */
1017de2362d3Smrg    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
101818781e08Smrg	r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192);
1019de2362d3Smrg
1020de2362d3Smrg    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
102118781e08Smrg	r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192);
1022de2362d3Smrg
1023de2362d3Smrg    BEGIN_BATCH(49);
102418781e08Smrg    PACK0(PA_SC_MPASS_PS_CNTL, 2);
102518781e08Smrg    E32(0);
1026de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
102718781e08Smrg	E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
1028de2362d3Smrg    else
102918781e08Smrg	E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
1030de2362d3Smrg		 0x00500000)); /* ? */
1031de2362d3Smrg
103218781e08Smrg    PACK0(PA_SC_LINE_CNTL, 9);
103318781e08Smrg    E32(0); // PA_SC_LINE_CNTL
103418781e08Smrg    E32(0); // PA_SC_AA_CONFIG
103518781e08Smrg    E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL
1036de2362d3Smrg	     (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
103718781e08Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
103818781e08Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
103918781e08Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
104018781e08Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
104118781e08Smrg    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_MCTX
104218781e08Smrg    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M
104318781e08Smrg
104418781e08Smrg    EREG(PA_SC_AA_MASK,                       0xFFFFFFFF);
104518781e08Smrg
104618781e08Smrg    PACK0(PA_CL_CLIP_CNTL, 5);
104718781e08Smrg    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
104818781e08Smrg    E32(FACE_bit);         // PA_SU_SC_MODE_CNTL
104918781e08Smrg    E32(VTX_XY_FMT_bit);   // PA_CL_VTE_CNTL
105018781e08Smrg    E32(0);                // PA_CL_VS_OUT_CNTL
105118781e08Smrg    E32(0);                // PA_CL_NANINF_CNTL
105218781e08Smrg
105318781e08Smrg    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
105418781e08Smrg    E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL
105518781e08Smrg    E32(0); // PA_SU_POLY_OFFSET_CLAMP
105618781e08Smrg    E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE
105718781e08Smrg    E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET
105818781e08Smrg    E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE
105918781e08Smrg    E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET
1060de2362d3Smrg
1061de2362d3Smrg    // SPI
1062de2362d3Smrg    if (info->ChipFamily < CHIP_FAMILY_RV770)
106318781e08Smrg	EREG(R7xx_SPI_THREAD_GROUPING,        0);
1064de2362d3Smrg    else
106518781e08Smrg	EREG(R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
1066de2362d3Smrg
1067de2362d3Smrg    /* default Interpolator setup */
106818781e08Smrg    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1069de2362d3Smrg			       (1 << SEMANTIC_1_shift)));
107018781e08Smrg    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1071de2362d3Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
107218781e08Smrg    E32(((0    << SEMANTIC_shift)	|
1073de2362d3Smrg	     (0x01 << DEFAULT_VAL_shift)	|
1074de2362d3Smrg	     SEL_CENTROID_bit));
1075de2362d3Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
107618781e08Smrg    E32(((1    << SEMANTIC_shift)	|
1077de2362d3Smrg	     (0x01 << DEFAULT_VAL_shift)	|
1078de2362d3Smrg	     SEL_CENTROID_bit));
1079de2362d3Smrg
108018781e08Smrg    PACK0(SPI_INPUT_Z, 4);
108118781e08Smrg    E32(0); // SPI_INPUT_Z
108218781e08Smrg    E32(0); // SPI_FOG_CNTL
108318781e08Smrg    E32(0); // SPI_FOG_FUNC_SCALE
108418781e08Smrg    E32(0); // SPI_FOG_FUNC_BIAS
1085de2362d3Smrg
1086de2362d3Smrg    END_BATCH();
1087de2362d3Smrg
1088de2362d3Smrg    // clear FS
1089de2362d3Smrg    fs_conf.bo = accel_state->shaders_bo;
109018781e08Smrg    r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1091de2362d3Smrg
1092de2362d3Smrg    // VGT
1093de2362d3Smrg    BEGIN_BATCH(46);
109418781e08Smrg    PACK0(VGT_MAX_VTX_INDX, 4);
109518781e08Smrg    E32(0xffffff); // VGT_MAX_VTX_INDX
109618781e08Smrg    E32(0); // VGT_MIN_VTX_INDX
109718781e08Smrg    E32(0); // VGT_INDX_OFFSET
109818781e08Smrg    E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX
109918781e08Smrg
110018781e08Smrg    EREG(VGT_PRIMITIVEID_EN,                  0);
110118781e08Smrg    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
110218781e08Smrg
110318781e08Smrg    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
110418781e08Smrg    E32(0); // VGT_INSTANCE_STEP_RATE_0
110518781e08Smrg    E32(0); // VGT_INSTANCE_STEP_RATE_1
110618781e08Smrg
110718781e08Smrg    PACK0(PA_SU_POINT_SIZE, 17);
110818781e08Smrg    E32(0); // PA_SU_POINT_SIZE
110918781e08Smrg    E32(0); // PA_SU_POINT_MINMAX
111018781e08Smrg    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
111118781e08Smrg    E32(0); // PA_SC_LINE_STIPPLE
111218781e08Smrg    E32(0); // VGT_OUTPUT_PATH_CNTL
111318781e08Smrg    E32(0); // VGT_HOS_CNTL
111418781e08Smrg    E32(0); // VGT_HOS_MAX_TESS_LEVEL
111518781e08Smrg    E32(0); // VGT_HOS_MIN_TESS_LEVEL
111618781e08Smrg    E32(0); // VGT_HOS_REUSE_DEPTH
111718781e08Smrg    E32(0); // VGT_GROUP_PRIM_TYPE
111818781e08Smrg    E32(0); // VGT_GROUP_FIRST_DECR
111918781e08Smrg    E32(0); // VGT_GROUP_DECR
112018781e08Smrg    E32(0); // VGT_GROUP_VECT_0_CNTL
112118781e08Smrg    E32(0); // VGT_GROUP_VECT_1_CNTL
112218781e08Smrg    E32(0); // VGT_GROUP_VECT_0_FMT_CNTL
112318781e08Smrg    E32(0); // VGT_GROUP_VECT_1_FMT_CNTL
112418781e08Smrg    E32(0); // VGT_GS_MODE
112518781e08Smrg
112618781e08Smrg    PACK0(VGT_STRMOUT_EN, 3);
112718781e08Smrg    E32(0); // VGT_STRMOUT_EN
112818781e08Smrg    E32(0); // VGT_REUSE_OFF
112918781e08Smrg    E32(0); // VGT_VTX_CNT_EN
113018781e08Smrg
113118781e08Smrg    EREG(VGT_STRMOUT_BUFFER_EN,               0);
113218781e08Smrg    EREG(SX_MISC,                             0);
1133de2362d3Smrg    END_BATCH();
1134de2362d3Smrg}
1135de2362d3Smrg
1136de2362d3Smrg
1137de2362d3Smrg/*
1138de2362d3Smrg * Commands
1139de2362d3Smrg */
1140de2362d3Smrg
1141de2362d3Smrgvoid
114218781e08Smrgr600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices)
1143de2362d3Smrg{
1144de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1145de2362d3Smrg    uint32_t i, count;
1146de2362d3Smrg
1147de2362d3Smrg    // calculate num of packets
1148de2362d3Smrg    count = 2;
1149de2362d3Smrg    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1150de2362d3Smrg	count += (draw_conf->num_indices + 1) / 2;
1151de2362d3Smrg    else
1152de2362d3Smrg	count += draw_conf->num_indices;
1153de2362d3Smrg
1154de2362d3Smrg    BEGIN_BATCH(8 + count);
115518781e08Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
115618781e08Smrg    PACK3(IT_INDEX_TYPE, 1);
1157de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
115818781e08Smrg    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1159de2362d3Smrg#else
116018781e08Smrg    E32(draw_conf->index_type);
1161de2362d3Smrg#endif
116218781e08Smrg    PACK3(IT_NUM_INSTANCES, 1);
116318781e08Smrg    E32(draw_conf->num_instances);
1164de2362d3Smrg
116518781e08Smrg    PACK3(IT_DRAW_INDEX_IMMD, count);
116618781e08Smrg    E32(draw_conf->num_indices);
116718781e08Smrg    E32(draw_conf->vgt_draw_initiator);
1168de2362d3Smrg
1169de2362d3Smrg    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1170de2362d3Smrg	for (i = 0; i < draw_conf->num_indices; i += 2) {
1171de2362d3Smrg	    if ((i + 1) == draw_conf->num_indices)
117218781e08Smrg		E32(indices[i]);
1173de2362d3Smrg	    else
117418781e08Smrg		E32((indices[i] | (indices[i + 1] << 16)));
1175de2362d3Smrg	}
1176de2362d3Smrg    } else {
1177de2362d3Smrg	for (i = 0; i < draw_conf->num_indices; i++)
117818781e08Smrg	    E32(indices[i]);
1179de2362d3Smrg    }
1180de2362d3Smrg    END_BATCH();
1181de2362d3Smrg}
1182de2362d3Smrg
1183de2362d3Smrgvoid
118418781e08Smrgr600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1185de2362d3Smrg{
1186de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1187de2362d3Smrg
1188de2362d3Smrg    BEGIN_BATCH(10);
118918781e08Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
119018781e08Smrg    PACK3(IT_INDEX_TYPE, 1);
1191de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
119218781e08Smrg    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1193de2362d3Smrg#else
119418781e08Smrg    E32(draw_conf->index_type);
1195de2362d3Smrg#endif
119618781e08Smrg    PACK3(IT_NUM_INSTANCES, 1);
119718781e08Smrg    E32(draw_conf->num_instances);
119818781e08Smrg    PACK3(IT_DRAW_INDEX_AUTO, 2);
119918781e08Smrg    E32(draw_conf->num_indices);
120018781e08Smrg    E32(draw_conf->vgt_draw_initiator);
1201de2362d3Smrg    END_BATCH();
1202de2362d3Smrg}
1203de2362d3Smrg
1204de2362d3Smrgvoid r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1205de2362d3Smrg{
1206de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1207de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1208de2362d3Smrg    draw_config_t   draw_conf;
1209de2362d3Smrg    vtx_resource_t  vtx_res;
1210de2362d3Smrg
1211de2362d3Smrg    if (accel_state->vbo.vb_start_op == -1)
1212de2362d3Smrg	return;
1213de2362d3Smrg
1214de2362d3Smrg    CLEAR (draw_conf);
1215de2362d3Smrg    CLEAR (vtx_res);
1216de2362d3Smrg
1217de2362d3Smrg    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
121818781e08Smrg        R600IBDiscard(pScrn);
1219de2362d3Smrg	return;
1220de2362d3Smrg    }
1221de2362d3Smrg
1222de2362d3Smrg    /* Vertex buffer setup */
1223de2362d3Smrg    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1224de2362d3Smrg    vtx_res.id              = SQ_VTX_RESOURCE_vs;
1225de2362d3Smrg    vtx_res.vtx_size_dw     = vtx_size / 4;
1226de2362d3Smrg    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1227de2362d3Smrg    vtx_res.mem_req_size    = 1;
122818781e08Smrg    vtx_res.vb_addr         = accel_state->vbo.vb_start_op;
1229de2362d3Smrg    vtx_res.bo              = accel_state->vbo.vb_bo;
1230de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1231de2362d3Smrg    vtx_res.endian          = SQ_ENDIAN_8IN32;
1232de2362d3Smrg#endif
123318781e08Smrg    r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1234de2362d3Smrg
1235de2362d3Smrg    /* Draw */
1236de2362d3Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
1237de2362d3Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1238de2362d3Smrg    draw_conf.num_instances      = 1;
1239de2362d3Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1240de2362d3Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1241de2362d3Smrg
124218781e08Smrg    r600_draw_auto(pScrn, &draw_conf);
1243de2362d3Smrg
1244de2362d3Smrg    /* XXX drm should handle this in fence submit */
124518781e08Smrg    r600_wait_3d_idle_clean(pScrn);
1246de2362d3Smrg
1247de2362d3Smrg    /* sync dst surface */
124818781e08Smrg    r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
124918781e08Smrg			     accel_state->dst_size, 0,
1250de2362d3Smrg			     accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1251de2362d3Smrg
1252de2362d3Smrg    accel_state->vbo.vb_start_op = -1;
1253de2362d3Smrg    accel_state->ib_reset_op = 0;
1254de2362d3Smrg
1255de2362d3Smrg}
1256de2362d3Smrg
1257