evergreen_accel.c revision b13dfe66
1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Authors: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg#ifdef HAVE_CONFIG_H
27921a55d8Smrg#include "config.h"
28921a55d8Smrg#endif
29921a55d8Smrg
30921a55d8Smrg#ifdef XF86DRM_MODE
31921a55d8Smrg
32921a55d8Smrg#include "xf86.h"
33921a55d8Smrg
34921a55d8Smrg#include <errno.h>
35921a55d8Smrg
36921a55d8Smrg#include "radeon.h"
37921a55d8Smrg#include "evergreen_shader.h"
38921a55d8Smrg#include "radeon_reg.h"
39921a55d8Smrg#include "evergreen_reg.h"
40921a55d8Smrg#include "evergreen_state.h"
41921a55d8Smrg
42921a55d8Smrg#include "radeon_drm.h"
43921a55d8Smrg#include "radeon_vbo.h"
44921a55d8Smrg#include "radeon_exa_shared.h"
45921a55d8Smrg
46b13dfe66Smrgstatic const uint32_t EVERGREEN_ROP[16] = {
47b13dfe66Smrg    RADEON_ROP3_ZERO, /* GXclear        */
48b13dfe66Smrg    RADEON_ROP3_DSa,  /* Gxand          */
49b13dfe66Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
50b13dfe66Smrg    RADEON_ROP3_S,    /* GXcopy         */
51b13dfe66Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
52b13dfe66Smrg    RADEON_ROP3_D,    /* GXnoop         */
53b13dfe66Smrg    RADEON_ROP3_DSx,  /* GXxor          */
54b13dfe66Smrg    RADEON_ROP3_DSo,  /* GXor           */
55b13dfe66Smrg    RADEON_ROP3_DSon, /* GXnor          */
56b13dfe66Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
57b13dfe66Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
58b13dfe66Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
59b13dfe66Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
60b13dfe66Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
61b13dfe66Smrg    RADEON_ROP3_DSan, /* GXnand         */
62b13dfe66Smrg    RADEON_ROP3_ONE,  /* GXset          */
63b13dfe66Smrg};
64b13dfe66Smrg
65921a55d8Smrgvoid
66921a55d8Smrgevergreen_start_3d(ScrnInfoPtr pScrn)
67921a55d8Smrg{
68921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
69921a55d8Smrg
70921a55d8Smrg    BEGIN_BATCH(3);
71921a55d8Smrg    PACK3(IT_CONTEXT_CONTROL, 2);
72921a55d8Smrg    E32(0x80000000);
73921a55d8Smrg    E32(0x80000000);
74921a55d8Smrg    END_BATCH();
75921a55d8Smrg
76921a55d8Smrg}
77921a55d8Smrg
78921a55d8Smrg/*
79921a55d8Smrg * Setup of functional groups
80921a55d8Smrg */
81921a55d8Smrg
82921a55d8Smrg// asic stack/thread/gpr limits - need to query the drm
83921a55d8Smrgstatic void
84921a55d8Smrgevergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
85921a55d8Smrg{
86921a55d8Smrg    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
87921a55d8Smrg    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
88921a55d8Smrg    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
89921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
90921a55d8Smrg
91921a55d8Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
92921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
93921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS))
94921a55d8Smrg	sq_config = 0;
95921a55d8Smrg    else
96921a55d8Smrg	sq_config = VC_ENABLE_bit;
97921a55d8Smrg
98921a55d8Smrg    sq_config |= (EXPORT_SRC_C_bit |
99921a55d8Smrg		  (sq_conf->cs_prio << CS_PRIO_shift) |
100921a55d8Smrg		  (sq_conf->ls_prio << LS_PRIO_shift) |
101921a55d8Smrg		  (sq_conf->hs_prio << HS_PRIO_shift) |
102921a55d8Smrg		  (sq_conf->ps_prio << PS_PRIO_shift) |
103921a55d8Smrg		  (sq_conf->vs_prio << VS_PRIO_shift) |
104921a55d8Smrg		  (sq_conf->gs_prio << GS_PRIO_shift) |
105921a55d8Smrg		  (sq_conf->es_prio << ES_PRIO_shift));
106921a55d8Smrg
107921a55d8Smrg    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
108921a55d8Smrg			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
109921a55d8Smrg			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
110921a55d8Smrg    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
111921a55d8Smrg			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
112921a55d8Smrg    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
113921a55d8Smrg			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
114921a55d8Smrg
115921a55d8Smrg    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
116921a55d8Smrg			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
117921a55d8Smrg			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
118921a55d8Smrg			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
119921a55d8Smrg    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
120921a55d8Smrg				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
121921a55d8Smrg
122921a55d8Smrg    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
123921a55d8Smrg				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
124921a55d8Smrg
125921a55d8Smrg    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
126921a55d8Smrg				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
127921a55d8Smrg
128921a55d8Smrg    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
129921a55d8Smrg				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
130921a55d8Smrg
131921a55d8Smrg    BEGIN_BATCH(16);
132921a55d8Smrg    /* disable dyn gprs */
133921a55d8Smrg    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
134921a55d8Smrg    PACK0(SQ_CONFIG, 4);
135921a55d8Smrg    E32(sq_config);
136921a55d8Smrg    E32(sq_gpr_resource_mgmt_1);
137921a55d8Smrg    E32(sq_gpr_resource_mgmt_2);
138921a55d8Smrg    E32(sq_gpr_resource_mgmt_3);
139921a55d8Smrg    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
140921a55d8Smrg    E32(sq_thread_resource_mgmt);
141921a55d8Smrg    E32(sq_thread_resource_mgmt_2);
142921a55d8Smrg    E32(sq_stack_resource_mgmt_1);
143921a55d8Smrg    E32(sq_stack_resource_mgmt_2);
144921a55d8Smrg    E32(sq_stack_resource_mgmt_3);
145921a55d8Smrg    END_BATCH();
146921a55d8Smrg}
147921a55d8Smrg
148921a55d8Smrgvoid
149921a55d8Smrgevergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
150921a55d8Smrg{
151b13dfe66Smrg    uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
152921a55d8Smrg    int pitch, slice, h;
153921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
154921a55d8Smrg
155921a55d8Smrg    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
156921a55d8Smrg		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
157921a55d8Smrg		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
158921a55d8Smrg		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
159921a55d8Smrg		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
160921a55d8Smrg		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
161921a55d8Smrg		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
162921a55d8Smrg    if (cb_conf->blend_clamp)
163921a55d8Smrg	cb_color_info |= BLEND_CLAMP_bit;
164921a55d8Smrg    if (cb_conf->fast_clear)
165921a55d8Smrg	cb_color_info |= FAST_CLEAR_bit;
166921a55d8Smrg    if (cb_conf->compression)
167921a55d8Smrg	cb_color_info |= COMPRESSION_bit;
168921a55d8Smrg    if (cb_conf->blend_bypass)
169921a55d8Smrg	cb_color_info |= BLEND_BYPASS_bit;
170921a55d8Smrg    if (cb_conf->simple_float)
171921a55d8Smrg	cb_color_info |= SIMPLE_FLOAT_bit;
172921a55d8Smrg    if (cb_conf->round_mode)
173921a55d8Smrg	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
174921a55d8Smrg    if (cb_conf->tile_compact)
175921a55d8Smrg	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
176921a55d8Smrg    if (cb_conf->rat)
177921a55d8Smrg	cb_color_info |= RAT_bit;
178921a55d8Smrg
179921a55d8Smrg    /* bit 4 needs to be set for linear and depth/stencil surfaces */
180b13dfe66Smrg    if (cb_conf->non_disp_tiling)
181b13dfe66Smrg	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
182921a55d8Smrg
183921a55d8Smrg    pitch = (cb_conf->w / 8) - 1;
184921a55d8Smrg    h = RADEON_ALIGN(cb_conf->h, 8);
185921a55d8Smrg    slice = ((cb_conf->w * h) / 64) - 1;
186921a55d8Smrg
187921a55d8Smrg    switch (cb_conf->resource_type) {
188921a55d8Smrg    case BUFFER:
189921a55d8Smrg	/* number of elements in the surface */
190921a55d8Smrg	cb_color_dim = pitch * slice;
191921a55d8Smrg	break;
192921a55d8Smrg    default:
193921a55d8Smrg	/* w/h of the surface */
194921a55d8Smrg	cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
195921a55d8Smrg			((cb_conf->h - 1) << HEIGHT_MAX_shift));
196921a55d8Smrg	break;
197921a55d8Smrg    }
198921a55d8Smrg
199921a55d8Smrg    BEGIN_BATCH(3 + 2);
200921a55d8Smrg    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
201921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
202921a55d8Smrg    END_BATCH();
203921a55d8Smrg
204921a55d8Smrg    /* Set CMASK & FMASK buffer to the offset of color buffer as
205921a55d8Smrg     * we don't use those this shouldn't cause any issue and we
206921a55d8Smrg     * then have a valid cmd stream
207921a55d8Smrg     */
208921a55d8Smrg    BEGIN_BATCH(3 + 2);
209921a55d8Smrg    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
210921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
211921a55d8Smrg    END_BATCH();
212921a55d8Smrg    BEGIN_BATCH(3 + 2);
213921a55d8Smrg    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
214921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
215921a55d8Smrg    END_BATCH();
216921a55d8Smrg
217921a55d8Smrg    /* tiling config */
218921a55d8Smrg    BEGIN_BATCH(3 + 2);
219921a55d8Smrg    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
220921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
221921a55d8Smrg    END_BATCH();
222921a55d8Smrg    BEGIN_BATCH(3 + 2);
223921a55d8Smrg    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
224921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
225921a55d8Smrg    END_BATCH();
226921a55d8Smrg
227b13dfe66Smrg    BEGIN_BATCH(33);
228921a55d8Smrg    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
229921a55d8Smrg    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
230921a55d8Smrg    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
231921a55d8Smrg    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
232921a55d8Smrg    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
233921a55d8Smrg    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
234921a55d8Smrg    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
235921a55d8Smrg    E32(0);
236921a55d8Smrg    E32(0);
237921a55d8Smrg    E32(0);
238921a55d8Smrg    E32(0);
239b13dfe66Smrg    EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
240b13dfe66Smrg    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
241b13dfe66Smrg					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
242b13dfe66Smrg    EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
243921a55d8Smrg    END_BATCH();
244b13dfe66Smrg
245921a55d8Smrg}
246921a55d8Smrg
247921a55d8Smrgstatic void
248921a55d8Smrgevergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
249921a55d8Smrg			      uint32_t size, uint64_t mc_addr,
250921a55d8Smrg			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
251921a55d8Smrg{
252921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
253921a55d8Smrg    uint32_t cp_coher_size;
254921a55d8Smrg    if (size == 0xffffffff)
255921a55d8Smrg	cp_coher_size = 0xffffffff;
256921a55d8Smrg    else
257921a55d8Smrg	cp_coher_size = ((size + 255) >> 8);
258921a55d8Smrg
259921a55d8Smrg    BEGIN_BATCH(5 + 2);
260921a55d8Smrg    PACK3(IT_SURFACE_SYNC, 4);
261921a55d8Smrg    E32(sync_type);
262921a55d8Smrg    E32(cp_coher_size);
263921a55d8Smrg    E32((mc_addr >> 8));
264921a55d8Smrg    E32(10); /* poll interval */
265921a55d8Smrg    RELOC_BATCH(bo, rdomains, wdomain);
266921a55d8Smrg    END_BATCH();
267921a55d8Smrg}
268921a55d8Smrg
269921a55d8Smrg/* inserts a wait for vline in the command stream */
270921a55d8Smrgvoid evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
271921a55d8Smrg				  xf86CrtcPtr crtc, int start, int stop)
272921a55d8Smrg{
273921a55d8Smrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
274921a55d8Smrg    drmmode_crtc_private_ptr drmmode_crtc;
275921a55d8Smrg    uint32_t offset;
276921a55d8Smrg
277921a55d8Smrg    if (!crtc)
278921a55d8Smrg        return;
279921a55d8Smrg
280921a55d8Smrg    drmmode_crtc = crtc->driver_private;
281921a55d8Smrg
282921a55d8Smrg    if (stop < start)
283921a55d8Smrg        return;
284921a55d8Smrg
285921a55d8Smrg    if (!crtc->enabled)
286921a55d8Smrg        return;
287921a55d8Smrg
288921a55d8Smrg    if (info->cs) {
289921a55d8Smrg        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
290921a55d8Smrg	    return;
291921a55d8Smrg    } else {
292921a55d8Smrg#ifdef USE_EXA
293921a55d8Smrg	if (info->useEXA)
294921a55d8Smrg	    offset = exaGetPixmapOffset(pPix);
295921a55d8Smrg	else
296921a55d8Smrg#endif
297921a55d8Smrg	    offset = pPix->devPrivate.ptr - info->FB;
298921a55d8Smrg
299921a55d8Smrg	/* if drawing to front buffer */
300921a55d8Smrg	if (offset != 0)
301921a55d8Smrg	    return;
302921a55d8Smrg    }
303921a55d8Smrg
304921a55d8Smrg    start = max(start, 0);
305921a55d8Smrg    stop = min(stop, crtc->mode.VDisplay);
306921a55d8Smrg
307921a55d8Smrg    if (start > crtc->mode.VDisplay)
308921a55d8Smrg        return;
309921a55d8Smrg
310921a55d8Smrg    BEGIN_BATCH(11);
311921a55d8Smrg    /* set the VLINE range */
312921a55d8Smrg    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
313921a55d8Smrg	 (start << EVERGREEN_VLINE_START_SHIFT) |
314921a55d8Smrg	 (stop << EVERGREEN_VLINE_END_SHIFT));
315921a55d8Smrg
316921a55d8Smrg    /* tell the CP to poll the VLINE state register */
317921a55d8Smrg    PACK3(IT_WAIT_REG_MEM, 6);
318921a55d8Smrg    E32(IT_WAIT_REG | IT_WAIT_EQ);
319921a55d8Smrg    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
320921a55d8Smrg    E32(0);
321921a55d8Smrg    E32(0);                          // Ref value
322921a55d8Smrg    E32(EVERGREEN_VLINE_STAT);    // Mask
323921a55d8Smrg    E32(10);                         // Wait interval
324921a55d8Smrg    /* add crtc reloc */
325921a55d8Smrg    PACK3(IT_NOP, 1);
326921a55d8Smrg    E32(drmmode_crtc->mode_crtc->crtc_id);
327921a55d8Smrg    END_BATCH();
328921a55d8Smrg}
329921a55d8Smrg
330b13dfe66Smrgvoid
331b13dfe66Smrgevergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
332b13dfe66Smrg{
333b13dfe66Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
334b13dfe66Smrg
335b13dfe66Smrg    BEGIN_BATCH(8);
336b13dfe66Smrg    /* Interpolator setup */
337b13dfe66Smrg    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
338b13dfe66Smrg    PACK0(SPI_PS_IN_CONTROL_0, 3);
339b13dfe66Smrg    E32(((num_interp << NUM_INTERP_shift) |
340b13dfe66Smrg	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
341b13dfe66Smrg    E32(0); // SPI_PS_IN_CONTROL_1
342b13dfe66Smrg    E32(0); // SPI_INTERP_CONTROL_0
343b13dfe66Smrg    END_BATCH();
344b13dfe66Smrg}
345b13dfe66Smrg
346921a55d8Smrgvoid
347921a55d8Smrgevergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
348921a55d8Smrg{
349921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
350921a55d8Smrg    uint32_t sq_pgm_resources;
351921a55d8Smrg
352921a55d8Smrg    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
353921a55d8Smrg			(fs_conf->stack_size << STACK_SIZE_shift));
354921a55d8Smrg
355921a55d8Smrg    if (fs_conf->dx10_clamp)
356921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
357921a55d8Smrg
358921a55d8Smrg    BEGIN_BATCH(3 + 2);
359921a55d8Smrg    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
360921a55d8Smrg    RELOC_BATCH(fs_conf->bo, domain, 0);
361921a55d8Smrg    END_BATCH();
362921a55d8Smrg
363921a55d8Smrg    BEGIN_BATCH(3);
364921a55d8Smrg    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
365921a55d8Smrg    END_BATCH();
366921a55d8Smrg}
367921a55d8Smrg
368921a55d8Smrgvoid
369921a55d8Smrgevergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
370921a55d8Smrg{
371921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
372921a55d8Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
373921a55d8Smrg
374921a55d8Smrg    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
375921a55d8Smrg			(vs_conf->stack_size << STACK_SIZE_shift));
376921a55d8Smrg
377921a55d8Smrg    if (vs_conf->dx10_clamp)
378921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
379921a55d8Smrg    if (vs_conf->uncached_first_inst)
380921a55d8Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
381921a55d8Smrg
382921a55d8Smrg    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
383921a55d8Smrg			  (vs_conf->double_round << DOUBLE_ROUND_shift));
384921a55d8Smrg
385921a55d8Smrg    if (vs_conf->allow_sdi)
386921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
387921a55d8Smrg    if (vs_conf->allow_sd0)
388921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
389921a55d8Smrg    if (vs_conf->allow_ddi)
390921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
391921a55d8Smrg    if (vs_conf->allow_ddo)
392921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
393921a55d8Smrg
394921a55d8Smrg    /* flush SQ cache */
395921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
396921a55d8Smrg				  vs_conf->shader_size, vs_conf->shader_addr,
397921a55d8Smrg				  vs_conf->bo, domain, 0);
398921a55d8Smrg
399921a55d8Smrg    BEGIN_BATCH(3 + 2);
400921a55d8Smrg    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
401921a55d8Smrg    RELOC_BATCH(vs_conf->bo, domain, 0);
402921a55d8Smrg    END_BATCH();
403921a55d8Smrg
404921a55d8Smrg    BEGIN_BATCH(4);
405921a55d8Smrg    PACK0(SQ_PGM_RESOURCES_VS, 2);
406921a55d8Smrg    E32(sq_pgm_resources);
407921a55d8Smrg    E32(sq_pgm_resources_2);
408921a55d8Smrg    END_BATCH();
409921a55d8Smrg}
410921a55d8Smrg
411921a55d8Smrgvoid
412921a55d8Smrgevergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
413921a55d8Smrg{
414921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
415921a55d8Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
416921a55d8Smrg
417921a55d8Smrg    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
418921a55d8Smrg			(ps_conf->stack_size << STACK_SIZE_shift));
419921a55d8Smrg
420921a55d8Smrg    if (ps_conf->dx10_clamp)
421921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
422921a55d8Smrg    if (ps_conf->uncached_first_inst)
423921a55d8Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
424921a55d8Smrg    if (ps_conf->clamp_consts)
425921a55d8Smrg	sq_pgm_resources |= CLAMP_CONSTS_bit;
426921a55d8Smrg
427921a55d8Smrg    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
428921a55d8Smrg			  (ps_conf->double_round << DOUBLE_ROUND_shift));
429921a55d8Smrg
430921a55d8Smrg    if (ps_conf->allow_sdi)
431921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
432921a55d8Smrg    if (ps_conf->allow_sd0)
433921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
434921a55d8Smrg    if (ps_conf->allow_ddi)
435921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
436921a55d8Smrg    if (ps_conf->allow_ddo)
437921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
438921a55d8Smrg
439921a55d8Smrg    /* flush SQ cache */
440921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
441921a55d8Smrg				  ps_conf->shader_size, ps_conf->shader_addr,
442921a55d8Smrg				  ps_conf->bo, domain, 0);
443921a55d8Smrg
444921a55d8Smrg    BEGIN_BATCH(3 + 2);
445921a55d8Smrg    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
446921a55d8Smrg    RELOC_BATCH(ps_conf->bo, domain, 0);
447921a55d8Smrg    END_BATCH();
448921a55d8Smrg
449921a55d8Smrg    BEGIN_BATCH(5);
450921a55d8Smrg    PACK0(SQ_PGM_RESOURCES_PS, 3);
451921a55d8Smrg    E32(sq_pgm_resources);
452921a55d8Smrg    E32(sq_pgm_resources_2);
453921a55d8Smrg    E32(ps_conf->export_mode);
454921a55d8Smrg    END_BATCH();
455921a55d8Smrg}
456921a55d8Smrg
457921a55d8Smrgvoid
458921a55d8Smrgevergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
459921a55d8Smrg{
460921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
461921a55d8Smrg    /* size reg is units of 16 consts (4 dwords each) */
462921a55d8Smrg    uint32_t size = const_conf->size_bytes >> 8;
463921a55d8Smrg
464921a55d8Smrg    if (size == 0)
465921a55d8Smrg	size = 1;
466921a55d8Smrg
467921a55d8Smrg    /* flush SQ cache */
468921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
469921a55d8Smrg				  const_conf->size_bytes, const_conf->const_addr,
470921a55d8Smrg				  const_conf->bo, domain, 0);
471921a55d8Smrg
472921a55d8Smrg    switch (const_conf->type) {
473921a55d8Smrg    case SHADER_TYPE_VS:
474921a55d8Smrg	BEGIN_BATCH(3);
475921a55d8Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
476921a55d8Smrg	END_BATCH();
477921a55d8Smrg	BEGIN_BATCH(3 + 2);
478921a55d8Smrg	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
479921a55d8Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
480921a55d8Smrg	END_BATCH();
481921a55d8Smrg	break;
482921a55d8Smrg    case SHADER_TYPE_PS:
483921a55d8Smrg	BEGIN_BATCH(3);
484921a55d8Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
485921a55d8Smrg	END_BATCH();
486921a55d8Smrg	BEGIN_BATCH(3 + 2);
487921a55d8Smrg	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
488921a55d8Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
489921a55d8Smrg	END_BATCH();
490921a55d8Smrg	break;
491921a55d8Smrg    default:
492921a55d8Smrg	ErrorF("Unsupported const type %d\n", const_conf->type);
493921a55d8Smrg	break;
494921a55d8Smrg    }
495921a55d8Smrg
496921a55d8Smrg}
497921a55d8Smrg
498921a55d8Smrgvoid
499921a55d8Smrgevergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
500921a55d8Smrg{
501921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
502921a55d8Smrg    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
503921a55d8Smrg     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
504921a55d8Smrg     */
505921a55d8Smrg    BEGIN_BATCH(3);
506921a55d8Smrg    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
507921a55d8Smrg    END_BATCH();
508921a55d8Smrg}
509921a55d8Smrg
510921a55d8Smrgstatic void
511921a55d8Smrgevergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
512921a55d8Smrg{
513921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
514921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
515921a55d8Smrg    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
516921a55d8Smrg
517921a55d8Smrg    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
518921a55d8Smrg			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
519921a55d8Smrg			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
520921a55d8Smrg			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
521921a55d8Smrg			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
522921a55d8Smrg    if (res->clamp_x)
523921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
524921a55d8Smrg
525921a55d8Smrg    if (res->format_comp_all)
526921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
527921a55d8Smrg
528921a55d8Smrg    if (res->srf_mode_all)
529921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
530921a55d8Smrg
531921a55d8Smrg    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
532921a55d8Smrg			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
533921a55d8Smrg			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
534921a55d8Smrg			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
535921a55d8Smrg
536921a55d8Smrg    if (res->uncached)
537921a55d8Smrg	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
538921a55d8Smrg
539921a55d8Smrg    /* XXX ??? */
540921a55d8Smrg    sq_vtx_constant_word4 = 0;
541921a55d8Smrg
542921a55d8Smrg    /* flush vertex cache */
543921a55d8Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
544921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
545921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS))
546921a55d8Smrg	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
547921a55d8Smrg				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
548921a55d8Smrg				      res->bo,
549921a55d8Smrg				      domain, 0);
550921a55d8Smrg    else
551921a55d8Smrg	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
552921a55d8Smrg				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
553921a55d8Smrg				      res->bo,
554921a55d8Smrg				      domain, 0);
555921a55d8Smrg
556921a55d8Smrg    BEGIN_BATCH(10 + 2);
557921a55d8Smrg    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
558921a55d8Smrg    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
559921a55d8Smrg    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
560921a55d8Smrg    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
561921a55d8Smrg    E32(sq_vtx_constant_word3);		// 3: swizzles
562921a55d8Smrg    E32(sq_vtx_constant_word4);		// 4: num elements
563921a55d8Smrg    E32(0);							// 5: n/a
564921a55d8Smrg    E32(0);							// 6: n/a
565921a55d8Smrg    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
566921a55d8Smrg    RELOC_BATCH(res->bo, domain, 0);
567921a55d8Smrg    END_BATCH();
568921a55d8Smrg}
569921a55d8Smrg
570921a55d8Smrgvoid
571921a55d8Smrgevergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
572921a55d8Smrg{
573921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
574921a55d8Smrg    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
575921a55d8Smrg    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
576921a55d8Smrg
577921a55d8Smrg    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
578921a55d8Smrg
579921a55d8Smrg    if (tex_res->w)
580921a55d8Smrg	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
581921a55d8Smrg				  ((tex_res->w - 1) << TEX_WIDTH_shift));
582921a55d8Smrg
583921a55d8Smrg    if (tex_res->tile_type)
584921a55d8Smrg	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
585921a55d8Smrg
586921a55d8Smrg    sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
587921a55d8Smrg
588921a55d8Smrg    if (tex_res->h)
589921a55d8Smrg	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
590921a55d8Smrg    if (tex_res->depth)
591921a55d8Smrg	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
592921a55d8Smrg
593921a55d8Smrg    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
594921a55d8Smrg			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
595921a55d8Smrg			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
596921a55d8Smrg			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
597921a55d8Smrg			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
598921a55d8Smrg			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
599921a55d8Smrg			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
600921a55d8Smrg			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
601921a55d8Smrg			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
602921a55d8Smrg			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
603921a55d8Smrg			     (tex_res->base_level << BASE_LEVEL_shift));
604921a55d8Smrg
605921a55d8Smrg    if (tex_res->srf_mode_all)
606921a55d8Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
607921a55d8Smrg    if (tex_res->force_degamma)
608921a55d8Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
609921a55d8Smrg
610921a55d8Smrg    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
611921a55d8Smrg			     (tex_res->base_array << BASE_ARRAY_shift) |
612921a55d8Smrg			     (tex_res->last_array << LAST_ARRAY_shift));
613921a55d8Smrg
614921a55d8Smrg    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
615921a55d8Smrg			     (tex_res->perf_modulation << PERF_MODULATION_shift));
616921a55d8Smrg
617921a55d8Smrg    if (tex_res->interlaced)
618921a55d8Smrg	sq_tex_resource_word6 |= INTERLACED_bit;
619921a55d8Smrg
620921a55d8Smrg    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
621921a55d8Smrg			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
622921a55d8Smrg
623921a55d8Smrg    /* flush texture cache */
624921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
625921a55d8Smrg				  tex_res->size, tex_res->base,
626921a55d8Smrg				  tex_res->bo, domain, 0);
627921a55d8Smrg
628921a55d8Smrg    BEGIN_BATCH(10 + 4);
629921a55d8Smrg    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
630921a55d8Smrg    E32(sq_tex_resource_word0);
631921a55d8Smrg    E32(sq_tex_resource_word1);
632921a55d8Smrg    E32(((tex_res->base) >> 8));
633921a55d8Smrg    E32(((tex_res->mip_base) >> 8));
634921a55d8Smrg    E32(sq_tex_resource_word4);
635921a55d8Smrg    E32(sq_tex_resource_word5);
636921a55d8Smrg    E32(sq_tex_resource_word6);
637921a55d8Smrg    E32(sq_tex_resource_word7);
638921a55d8Smrg    RELOC_BATCH(tex_res->bo, domain, 0);
639921a55d8Smrg    RELOC_BATCH(tex_res->mip_bo, domain, 0);
640921a55d8Smrg    END_BATCH();
641921a55d8Smrg}
642921a55d8Smrg
643921a55d8Smrgvoid
644921a55d8Smrgevergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
645921a55d8Smrg{
646921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
647921a55d8Smrg    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
648921a55d8Smrg
649921a55d8Smrg    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
650921a55d8Smrg			    (s->clamp_y       << CLAMP_Y_shift)					|
651921a55d8Smrg			    (s->clamp_z       << CLAMP_Z_shift)					|
652921a55d8Smrg			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
653921a55d8Smrg			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
654921a55d8Smrg			    (s->z_filter      << Z_FILTER_shift)	|
655921a55d8Smrg			    (s->mip_filter    << MIP_FILTER_shift)				|
656921a55d8Smrg			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
657921a55d8Smrg			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
658921a55d8Smrg			    (s->chroma_key    << CHROMA_KEY_shift));
659921a55d8Smrg
660921a55d8Smrg    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
661921a55d8Smrg			    (s->max_lod       << MAX_LOD_shift)					|
662921a55d8Smrg			    (s->perf_mip      << PERF_MIP_shift)	|
663921a55d8Smrg			    (s->perf_z        << PERF_Z_shift));
664921a55d8Smrg
665921a55d8Smrg
666921a55d8Smrg    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
667921a55d8Smrg			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
668921a55d8Smrg
669921a55d8Smrg    if (s->mc_coord_truncate)
670921a55d8Smrg	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
671921a55d8Smrg    if (s->force_degamma)
672921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
673921a55d8Smrg    if (s->truncate_coord)
674921a55d8Smrg	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
675921a55d8Smrg    if (s->disable_cube_wrap)
676921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
677921a55d8Smrg    if (s->type)
678921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
679921a55d8Smrg
680921a55d8Smrg    BEGIN_BATCH(5);
681921a55d8Smrg    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
682921a55d8Smrg    E32(sq_tex_sampler_word0);
683921a55d8Smrg    E32(sq_tex_sampler_word1);
684921a55d8Smrg    E32(sq_tex_sampler_word2);
685921a55d8Smrg    END_BATCH();
686921a55d8Smrg}
687921a55d8Smrg
688921a55d8Smrg//XXX deal with clip offsets in clip setup
689921a55d8Smrgvoid
690921a55d8Smrgevergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
691921a55d8Smrg{
692921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
693921a55d8Smrg
694921a55d8Smrg    BEGIN_BATCH(4);
695921a55d8Smrg    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
696921a55d8Smrg    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
697921a55d8Smrg	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
698921a55d8Smrg    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
699921a55d8Smrg	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
700921a55d8Smrg    END_BATCH();
701921a55d8Smrg}
702921a55d8Smrg
703921a55d8Smrgvoid
704921a55d8Smrgevergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
705921a55d8Smrg{
706921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
707921a55d8Smrg
708921a55d8Smrg    BEGIN_BATCH(4);
709921a55d8Smrg    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
710921a55d8Smrg    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
711921a55d8Smrg	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
712921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
713921a55d8Smrg    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
714921a55d8Smrg	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
715921a55d8Smrg    END_BATCH();
716921a55d8Smrg}
717921a55d8Smrg
718921a55d8Smrgvoid
719921a55d8Smrgevergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
720921a55d8Smrg{
721921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
722921a55d8Smrg
723921a55d8Smrg    BEGIN_BATCH(4);
724921a55d8Smrg    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
725921a55d8Smrg    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
726921a55d8Smrg	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
727921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
728921a55d8Smrg    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
729921a55d8Smrg	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
730921a55d8Smrg    END_BATCH();
731921a55d8Smrg}
732921a55d8Smrg
733921a55d8Smrgvoid
734921a55d8Smrgevergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
735921a55d8Smrg{
736921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
737921a55d8Smrg
738921a55d8Smrg    BEGIN_BATCH(4);
739921a55d8Smrg    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
740921a55d8Smrg    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
741921a55d8Smrg	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
742921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
743921a55d8Smrg    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
744921a55d8Smrg	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
745921a55d8Smrg    END_BATCH();
746921a55d8Smrg}
747921a55d8Smrg
748921a55d8Smrgvoid
749921a55d8Smrgevergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
750921a55d8Smrg{
751921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
752921a55d8Smrg
753921a55d8Smrg    BEGIN_BATCH(4);
754921a55d8Smrg    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
755921a55d8Smrg    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
756921a55d8Smrg	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
757921a55d8Smrg    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
758921a55d8Smrg	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
759921a55d8Smrg    END_BATCH();
760921a55d8Smrg}
761921a55d8Smrg
762921a55d8Smrg/*
763921a55d8Smrg * Setup of default state
764921a55d8Smrg */
765921a55d8Smrg
766921a55d8Smrgvoid
767921a55d8Smrgevergreen_set_default_state(ScrnInfoPtr pScrn)
768921a55d8Smrg{
769921a55d8Smrg    tex_resource_t tex_res;
770921a55d8Smrg    shader_config_t fs_conf;
771921a55d8Smrg    sq_config_t sq_conf;
772921a55d8Smrg    int i;
773921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
774921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
775921a55d8Smrg
776921a55d8Smrg    if (accel_state->XInited3D)
777921a55d8Smrg	return;
778921a55d8Smrg
779921a55d8Smrg    memset(&tex_res, 0, sizeof(tex_resource_t));
780921a55d8Smrg    memset(&fs_conf, 0, sizeof(shader_config_t));
781921a55d8Smrg
782921a55d8Smrg    accel_state->XInited3D = TRUE;
783921a55d8Smrg
784921a55d8Smrg    evergreen_start_3d(pScrn);
785921a55d8Smrg
786921a55d8Smrg    /* SQ */
787921a55d8Smrg    sq_conf.ps_prio = 0;
788921a55d8Smrg    sq_conf.vs_prio = 1;
789921a55d8Smrg    sq_conf.gs_prio = 2;
790921a55d8Smrg    sq_conf.es_prio = 3;
791921a55d8Smrg    sq_conf.hs_prio = 0;
792921a55d8Smrg    sq_conf.ls_prio = 0;
793921a55d8Smrg    sq_conf.cs_prio = 0;
794921a55d8Smrg
795921a55d8Smrg    switch (info->ChipFamily) {
796921a55d8Smrg    case CHIP_FAMILY_CEDAR:
797921a55d8Smrg    default:
798921a55d8Smrg	sq_conf.num_ps_gprs = 93;
799921a55d8Smrg	sq_conf.num_vs_gprs = 46;
800921a55d8Smrg	sq_conf.num_temp_gprs = 4;
801921a55d8Smrg	sq_conf.num_gs_gprs = 31;
802921a55d8Smrg	sq_conf.num_es_gprs = 31;
803921a55d8Smrg	sq_conf.num_hs_gprs = 23;
804921a55d8Smrg	sq_conf.num_ls_gprs = 23;
805921a55d8Smrg	sq_conf.num_ps_threads = 96;
806921a55d8Smrg	sq_conf.num_vs_threads = 16;
807921a55d8Smrg	sq_conf.num_gs_threads = 16;
808921a55d8Smrg	sq_conf.num_es_threads = 16;
809921a55d8Smrg	sq_conf.num_hs_threads = 16;
810921a55d8Smrg	sq_conf.num_ls_threads = 16;
811921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
812921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
813921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
814921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
815921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
816921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
817921a55d8Smrg	break;
818921a55d8Smrg    case CHIP_FAMILY_REDWOOD:
819921a55d8Smrg	sq_conf.num_ps_gprs = 93;
820921a55d8Smrg	sq_conf.num_vs_gprs = 46;
821921a55d8Smrg	sq_conf.num_temp_gprs = 4;
822921a55d8Smrg	sq_conf.num_gs_gprs = 31;
823921a55d8Smrg	sq_conf.num_es_gprs = 31;
824921a55d8Smrg	sq_conf.num_hs_gprs = 23;
825921a55d8Smrg	sq_conf.num_ls_gprs = 23;
826921a55d8Smrg	sq_conf.num_ps_threads = 128;
827921a55d8Smrg	sq_conf.num_vs_threads = 20;
828921a55d8Smrg	sq_conf.num_gs_threads = 20;
829921a55d8Smrg	sq_conf.num_es_threads = 20;
830921a55d8Smrg	sq_conf.num_hs_threads = 20;
831921a55d8Smrg	sq_conf.num_ls_threads = 20;
832921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
833921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
834921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
835921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
836921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
837921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
838921a55d8Smrg	break;
839921a55d8Smrg    case CHIP_FAMILY_JUNIPER:
840921a55d8Smrg	sq_conf.num_ps_gprs = 93;
841921a55d8Smrg	sq_conf.num_vs_gprs = 46;
842921a55d8Smrg	sq_conf.num_temp_gprs = 4;
843921a55d8Smrg	sq_conf.num_gs_gprs = 31;
844921a55d8Smrg	sq_conf.num_es_gprs = 31;
845921a55d8Smrg	sq_conf.num_hs_gprs = 23;
846921a55d8Smrg	sq_conf.num_ls_gprs = 23;
847921a55d8Smrg	sq_conf.num_ps_threads = 128;
848921a55d8Smrg	sq_conf.num_vs_threads = 20;
849921a55d8Smrg	sq_conf.num_gs_threads = 20;
850921a55d8Smrg	sq_conf.num_es_threads = 20;
851921a55d8Smrg	sq_conf.num_hs_threads = 20;
852921a55d8Smrg	sq_conf.num_ls_threads = 20;
853921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
854921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
855921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
856921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
857921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
858921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
859921a55d8Smrg	break;
860921a55d8Smrg    case CHIP_FAMILY_CYPRESS:
861921a55d8Smrg    case CHIP_FAMILY_HEMLOCK:
862921a55d8Smrg	sq_conf.num_ps_gprs = 93;
863921a55d8Smrg	sq_conf.num_vs_gprs = 46;
864921a55d8Smrg	sq_conf.num_temp_gprs = 4;
865921a55d8Smrg	sq_conf.num_gs_gprs = 31;
866921a55d8Smrg	sq_conf.num_es_gprs = 31;
867921a55d8Smrg	sq_conf.num_hs_gprs = 23;
868921a55d8Smrg	sq_conf.num_ls_gprs = 23;
869921a55d8Smrg	sq_conf.num_ps_threads = 128;
870921a55d8Smrg	sq_conf.num_vs_threads = 20;
871921a55d8Smrg	sq_conf.num_gs_threads = 20;
872921a55d8Smrg	sq_conf.num_es_threads = 20;
873921a55d8Smrg	sq_conf.num_hs_threads = 20;
874921a55d8Smrg	sq_conf.num_ls_threads = 20;
875921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
876921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
877921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
878921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
879921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
880921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
881921a55d8Smrg	break;
882921a55d8Smrg    case CHIP_FAMILY_PALM:
883921a55d8Smrg	sq_conf.num_ps_gprs = 93;
884921a55d8Smrg	sq_conf.num_vs_gprs = 46;
885921a55d8Smrg	sq_conf.num_temp_gprs = 4;
886921a55d8Smrg	sq_conf.num_gs_gprs = 31;
887921a55d8Smrg	sq_conf.num_es_gprs = 31;
888921a55d8Smrg	sq_conf.num_hs_gprs = 23;
889921a55d8Smrg	sq_conf.num_ls_gprs = 23;
890921a55d8Smrg	sq_conf.num_ps_threads = 96;
891921a55d8Smrg	sq_conf.num_vs_threads = 16;
892921a55d8Smrg	sq_conf.num_gs_threads = 16;
893921a55d8Smrg	sq_conf.num_es_threads = 16;
894921a55d8Smrg	sq_conf.num_hs_threads = 16;
895921a55d8Smrg	sq_conf.num_ls_threads = 16;
896921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
897921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
898921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
899921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
900921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
901921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
902921a55d8Smrg	break;
903921a55d8Smrg    case CHIP_FAMILY_BARTS:
904921a55d8Smrg	sq_conf.num_ps_gprs = 93;
905921a55d8Smrg	sq_conf.num_vs_gprs = 46;
906921a55d8Smrg	sq_conf.num_temp_gprs = 4;
907921a55d8Smrg	sq_conf.num_gs_gprs = 31;
908921a55d8Smrg	sq_conf.num_es_gprs = 31;
909921a55d8Smrg	sq_conf.num_hs_gprs = 23;
910921a55d8Smrg	sq_conf.num_ls_gprs = 23;
911921a55d8Smrg	sq_conf.num_ps_threads = 128;
912921a55d8Smrg	sq_conf.num_vs_threads = 20;
913921a55d8Smrg	sq_conf.num_gs_threads = 20;
914921a55d8Smrg	sq_conf.num_es_threads = 20;
915921a55d8Smrg	sq_conf.num_hs_threads = 20;
916921a55d8Smrg	sq_conf.num_ls_threads = 20;
917921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
918921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
919921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
920921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
921921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
922921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
923921a55d8Smrg	break;
924921a55d8Smrg    case CHIP_FAMILY_TURKS:
925921a55d8Smrg	sq_conf.num_ps_gprs = 93;
926921a55d8Smrg	sq_conf.num_vs_gprs = 46;
927921a55d8Smrg	sq_conf.num_temp_gprs = 4;
928921a55d8Smrg	sq_conf.num_gs_gprs = 31;
929921a55d8Smrg	sq_conf.num_es_gprs = 31;
930921a55d8Smrg	sq_conf.num_hs_gprs = 23;
931921a55d8Smrg	sq_conf.num_ls_gprs = 23;
932921a55d8Smrg	sq_conf.num_ps_threads = 128;
933921a55d8Smrg	sq_conf.num_vs_threads = 20;
934921a55d8Smrg	sq_conf.num_gs_threads = 20;
935921a55d8Smrg	sq_conf.num_es_threads = 20;
936921a55d8Smrg	sq_conf.num_hs_threads = 20;
937921a55d8Smrg	sq_conf.num_ls_threads = 20;
938921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
939921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
940921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
941921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
942921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
943921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
944921a55d8Smrg	break;
945921a55d8Smrg    case CHIP_FAMILY_CAICOS:
946921a55d8Smrg	sq_conf.num_ps_gprs = 93;
947921a55d8Smrg	sq_conf.num_vs_gprs = 46;
948921a55d8Smrg	sq_conf.num_temp_gprs = 4;
949921a55d8Smrg	sq_conf.num_gs_gprs = 31;
950921a55d8Smrg	sq_conf.num_es_gprs = 31;
951921a55d8Smrg	sq_conf.num_hs_gprs = 23;
952921a55d8Smrg	sq_conf.num_ls_gprs = 23;
953921a55d8Smrg	sq_conf.num_ps_threads = 128;
954921a55d8Smrg	sq_conf.num_vs_threads = 10;
955921a55d8Smrg	sq_conf.num_gs_threads = 10;
956921a55d8Smrg	sq_conf.num_es_threads = 10;
957921a55d8Smrg	sq_conf.num_hs_threads = 10;
958921a55d8Smrg	sq_conf.num_ls_threads = 10;
959921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
960921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
961921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
962921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
963921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
964921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
965921a55d8Smrg	break;
966921a55d8Smrg    }
967921a55d8Smrg
968921a55d8Smrg    evergreen_sq_setup(pScrn, &sq_conf);
969921a55d8Smrg
970921a55d8Smrg    BEGIN_BATCH(24);
971921a55d8Smrg    EREG(SQ_LDS_ALLOC_PS, 0);
972921a55d8Smrg    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
973921a55d8Smrg
974921a55d8Smrg    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
975921a55d8Smrg    E32(0);
976921a55d8Smrg    E32(0);
977921a55d8Smrg    E32(0);
978921a55d8Smrg    E32(0);
979921a55d8Smrg    E32(0);
980921a55d8Smrg    E32(0);
981921a55d8Smrg
982921a55d8Smrg    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
983921a55d8Smrg    E32(0);
984921a55d8Smrg    E32(0);
985921a55d8Smrg    E32(0);
986921a55d8Smrg    E32(0);
987921a55d8Smrg
988921a55d8Smrg    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
989921a55d8Smrg    E32(0);
990921a55d8Smrg    E32(0);
991921a55d8Smrg    END_BATCH();
992921a55d8Smrg
993921a55d8Smrg    /* DB */
994921a55d8Smrg    BEGIN_BATCH(3 + 2);
995921a55d8Smrg    EREG(DB_Z_INFO,                           0);
996921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
997921a55d8Smrg    END_BATCH();
998921a55d8Smrg
999921a55d8Smrg    BEGIN_BATCH(3 + 2);
1000921a55d8Smrg    EREG(DB_STENCIL_INFO,                     0);
1001921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1002921a55d8Smrg    END_BATCH();
1003921a55d8Smrg
1004921a55d8Smrg    BEGIN_BATCH(3 + 2);
1005921a55d8Smrg    EREG(DB_HTILE_DATA_BASE,                    0);
1006921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1007921a55d8Smrg    END_BATCH();
1008921a55d8Smrg
1009921a55d8Smrg    BEGIN_BATCH(49);
1010921a55d8Smrg    EREG(DB_DEPTH_CONTROL,                    0);
1011921a55d8Smrg
1012921a55d8Smrg    PACK0(PA_SC_VPORT_ZMIN_0, 2);
1013921a55d8Smrg    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1014921a55d8Smrg    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1015921a55d8Smrg
1016921a55d8Smrg    PACK0(DB_RENDER_CONTROL, 5);
1017921a55d8Smrg    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1018921a55d8Smrg    E32(0); // DB_COUNT_CONTROL
1019921a55d8Smrg    E32(0); // DB_DEPTH_VIEW
1020921a55d8Smrg    E32(0x2a); // DB_RENDER_OVERRIDE
1021921a55d8Smrg    E32(0); // DB_RENDER_OVERRIDE2
1022921a55d8Smrg
1023921a55d8Smrg    PACK0(DB_STENCIL_CLEAR, 2);
1024921a55d8Smrg    E32(0); // DB_STENCIL_CLEAR
1025921a55d8Smrg    E32(0); // DB_DEPTH_CLEAR
1026921a55d8Smrg
1027921a55d8Smrg    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1028921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1029921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1030921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1031921a55d8Smrg
1032921a55d8Smrg    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1033921a55d8Smrg			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1034921a55d8Smrg
1035921a55d8Smrg    // SX
1036921a55d8Smrg    EREG(SX_MISC,               0);
1037921a55d8Smrg
1038921a55d8Smrg    // CB
1039921a55d8Smrg    PACK0(SX_ALPHA_TEST_CONTROL, 5);
1040921a55d8Smrg    E32(0); // SX_ALPHA_TEST_CONTROL
1041921a55d8Smrg    E32(0x00000000); //CB_BLEND_RED
1042921a55d8Smrg    E32(0x00000000); //CB_BLEND_GREEN
1043921a55d8Smrg    E32(0x00000000); //CB_BLEND_BLUE
1044921a55d8Smrg    E32(0x00000000); //CB_BLEND_ALPHA
1045921a55d8Smrg
1046921a55d8Smrg    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1047921a55d8Smrg
1048921a55d8Smrg    // SC
1049921a55d8Smrg    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1050921a55d8Smrg					       (0 << WINDOW_Y_OFFSET_shift)));
1051921a55d8Smrg    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1052921a55d8Smrg    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1053921a55d8Smrg    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1054921a55d8Smrg    END_BATCH();
1055921a55d8Smrg
1056921a55d8Smrg    /* clip boolean is set to always visible -> doesn't matter */
1057921a55d8Smrg    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1058921a55d8Smrg	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1059921a55d8Smrg
1060921a55d8Smrg    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1061921a55d8Smrg	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1062921a55d8Smrg
1063b13dfe66Smrg    BEGIN_BATCH(57);
1064921a55d8Smrg    PACK0(PA_SC_MODE_CNTL_0, 2);
1065921a55d8Smrg    E32(0); // PA_SC_MODE_CNTL_0
1066921a55d8Smrg    E32(0); // PA_SC_MODE_CNTL_1
1067921a55d8Smrg
1068921a55d8Smrg    PACK0(PA_SC_LINE_CNTL, 16);
1069921a55d8Smrg    E32(0); // PA_SC_LINE_CNTL
1070921a55d8Smrg    E32(0); // PA_SC_AA_CONFIG
1071921a55d8Smrg    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1072921a55d8Smrg	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1073921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1074921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1075921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1076921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1077921a55d8Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1078921a55d8Smrg    E32(0);
1079921a55d8Smrg    E32(0);
1080921a55d8Smrg    E32(0);
1081921a55d8Smrg    E32(0);
1082921a55d8Smrg    E32(0);
1083921a55d8Smrg    E32(0);
1084921a55d8Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1085921a55d8Smrg    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1086921a55d8Smrg
1087921a55d8Smrg    // CL
1088921a55d8Smrg    PACK0(PA_CL_CLIP_CNTL, 8);
1089921a55d8Smrg    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1090921a55d8Smrg    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1091921a55d8Smrg    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1092921a55d8Smrg    E32(0); // PA_CL_VS_OUT_CNTL
1093921a55d8Smrg    E32(0); // PA_CL_NANINF_CNTL
1094921a55d8Smrg    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1095921a55d8Smrg    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1096921a55d8Smrg    E32(0); // PA_SU_PRIM_FILTER_CNTL
1097921a55d8Smrg
1098921a55d8Smrg    // SU
1099921a55d8Smrg    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1100921a55d8Smrg    E32(0);
1101921a55d8Smrg    E32(0);
1102921a55d8Smrg    E32(0);
1103921a55d8Smrg    E32(0);
1104921a55d8Smrg    E32(0);
1105921a55d8Smrg    E32(0);
1106921a55d8Smrg
1107b13dfe66Smrg    /* src = semantic id 0; mask = semantic id 1 */
1108b13dfe66Smrg    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1109b13dfe66Smrg			   (1 << SEMANTIC_1_shift)));
1110b13dfe66Smrg    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1111b13dfe66Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1112b13dfe66Smrg    E32(((0    << SEMANTIC_shift)	|
1113b13dfe66Smrg	 (0x01 << DEFAULT_VAL_shift)));
1114b13dfe66Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1115b13dfe66Smrg    E32(((1    << SEMANTIC_shift)	|
1116b13dfe66Smrg	 (0x01 << DEFAULT_VAL_shift)));
1117b13dfe66Smrg
1118921a55d8Smrg    PACK0(SPI_INPUT_Z, 8);
1119921a55d8Smrg    E32(0); // SPI_INPUT_Z
1120921a55d8Smrg    E32(0); // SPI_FOG_CNTL
1121921a55d8Smrg    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1122921a55d8Smrg    E32(0); // SPI_PS_IN_CONTROL_2
1123921a55d8Smrg    E32(0);
1124921a55d8Smrg    E32(0);
1125921a55d8Smrg    E32(0);
1126921a55d8Smrg    E32(0);
1127921a55d8Smrg    END_BATCH();
1128921a55d8Smrg
1129921a55d8Smrg    // clear FS
1130921a55d8Smrg    fs_conf.bo = accel_state->shaders_bo;
1131921a55d8Smrg    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1132921a55d8Smrg
1133921a55d8Smrg    // VGT
1134921a55d8Smrg    BEGIN_BATCH(46);
1135921a55d8Smrg
1136921a55d8Smrg    PACK0(VGT_MAX_VTX_INDX, 4);
1137921a55d8Smrg    E32(0xffffff);
1138921a55d8Smrg    E32(0);
1139921a55d8Smrg    E32(0);
1140921a55d8Smrg    E32(0);
1141921a55d8Smrg
1142921a55d8Smrg    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1143921a55d8Smrg    E32(0);
1144921a55d8Smrg    E32(0);
1145921a55d8Smrg
1146921a55d8Smrg    PACK0(VGT_REUSE_OFF, 2);
1147921a55d8Smrg    E32(0);
1148921a55d8Smrg    E32(0);
1149921a55d8Smrg
1150921a55d8Smrg    PACK0(PA_SU_POINT_SIZE, 17);
1151921a55d8Smrg    E32(0); // PA_SU_POINT_SIZE
1152921a55d8Smrg    E32(0); // PA_SU_POINT_MINMAX
1153921a55d8Smrg    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1154921a55d8Smrg    E32(0); // PA_SC_LINE_STIPPLE
1155921a55d8Smrg    E32(0); // VGT_OUTPUT_PATH_CNTL
1156921a55d8Smrg    E32(0); // VGT_HOS_CNTL
1157921a55d8Smrg    E32(0);
1158921a55d8Smrg    E32(0);
1159921a55d8Smrg    E32(0);
1160921a55d8Smrg    E32(0);
1161921a55d8Smrg    E32(0);
1162921a55d8Smrg    E32(0);
1163921a55d8Smrg    E32(0);
1164921a55d8Smrg    E32(0);
1165921a55d8Smrg    E32(0);
1166921a55d8Smrg    E32(0);
1167921a55d8Smrg    E32(0); // VGT_GS_MODE
1168921a55d8Smrg
1169921a55d8Smrg    EREG(VGT_PRIMITIVEID_EN,                  0);
1170921a55d8Smrg    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1171921a55d8Smrg    EREG(VGT_SHADER_STAGES_EN,          0);
1172921a55d8Smrg
1173921a55d8Smrg    PACK0(VGT_STRMOUT_CONFIG, 2);
1174921a55d8Smrg    E32(0);
1175921a55d8Smrg    E32(0);
1176921a55d8Smrg    END_BATCH();
1177921a55d8Smrg}
1178921a55d8Smrg
1179921a55d8Smrg
1180921a55d8Smrg/*
1181921a55d8Smrg * Commands
1182921a55d8Smrg */
1183921a55d8Smrg
1184921a55d8Smrgvoid
1185921a55d8Smrgevergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1186921a55d8Smrg{
1187921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1188921a55d8Smrg
1189921a55d8Smrg    BEGIN_BATCH(10);
1190921a55d8Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1191921a55d8Smrg    PACK3(IT_INDEX_TYPE, 1);
1192b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1193b13dfe66Smrg    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1194b13dfe66Smrg#else
1195921a55d8Smrg    E32(draw_conf->index_type);
1196b13dfe66Smrg#endif
1197921a55d8Smrg    PACK3(IT_NUM_INSTANCES, 1);
1198921a55d8Smrg    E32(draw_conf->num_instances);
1199921a55d8Smrg    PACK3(IT_DRAW_INDEX_AUTO, 2);
1200921a55d8Smrg    E32(draw_conf->num_indices);
1201921a55d8Smrg    E32(draw_conf->vgt_draw_initiator);
1202921a55d8Smrg    END_BATCH();
1203921a55d8Smrg}
1204921a55d8Smrg
1205921a55d8Smrgvoid evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1206921a55d8Smrg{
1207921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1208921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1209921a55d8Smrg    draw_config_t   draw_conf;
1210921a55d8Smrg    vtx_resource_t  vtx_res;
1211921a55d8Smrg
1212921a55d8Smrg    if (accel_state->vbo.vb_start_op == -1)
1213921a55d8Smrg      return;
1214921a55d8Smrg
1215921a55d8Smrg    CLEAR (draw_conf);
1216921a55d8Smrg    CLEAR (vtx_res);
1217921a55d8Smrg
1218921a55d8Smrg    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1219921a55d8Smrg	radeon_ib_discard(pScrn);
1220921a55d8Smrg	radeon_cs_flush_indirect(pScrn);
1221921a55d8Smrg	return;
1222921a55d8Smrg    }
1223921a55d8Smrg
1224921a55d8Smrg    /* Vertex buffer setup */
1225921a55d8Smrg    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1226921a55d8Smrg    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1227921a55d8Smrg    vtx_res.vtx_size_dw     = vtx_size / 4;
1228921a55d8Smrg    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1229921a55d8Smrg    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
1230921a55d8Smrg    vtx_res.bo              = accel_state->vbo.vb_bo;
1231921a55d8Smrg    vtx_res.dst_sel_x       = SQ_SEL_X;
1232921a55d8Smrg    vtx_res.dst_sel_y       = SQ_SEL_Y;
1233921a55d8Smrg    vtx_res.dst_sel_z       = SQ_SEL_Z;
1234921a55d8Smrg    vtx_res.dst_sel_w       = SQ_SEL_W;
1235b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1236b13dfe66Smrg    vtx_res.endian          = SQ_ENDIAN_8IN32;
1237b13dfe66Smrg#endif
1238921a55d8Smrg    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1239921a55d8Smrg
1240921a55d8Smrg    /* Draw */
1241921a55d8Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
1242921a55d8Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1243921a55d8Smrg    draw_conf.num_instances      = 1;
1244921a55d8Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1245921a55d8Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1246921a55d8Smrg
1247921a55d8Smrg    evergreen_draw_auto(pScrn, &draw_conf);
1248921a55d8Smrg
1249921a55d8Smrg    /* sync dst surface */
1250921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1251921a55d8Smrg				  accel_state->dst_size, accel_state->dst_obj.offset,
1252921a55d8Smrg				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1253921a55d8Smrg
1254921a55d8Smrg    accel_state->vbo.vb_start_op = -1;
1255921a55d8Smrg    accel_state->cbuf.vb_start_op = -1;
1256921a55d8Smrg    accel_state->ib_reset_op = 0;
1257921a55d8Smrg
1258921a55d8Smrg}
1259921a55d8Smrg
1260921a55d8Smrg#endif
1261