evergreen_accel.c revision 921a55d8
1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Authors: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg#ifdef HAVE_CONFIG_H
27921a55d8Smrg#include "config.h"
28921a55d8Smrg#endif
29921a55d8Smrg
30921a55d8Smrg#ifdef XF86DRM_MODE
31921a55d8Smrg
32921a55d8Smrg#include "xf86.h"
33921a55d8Smrg
34921a55d8Smrg#include <errno.h>
35921a55d8Smrg
36921a55d8Smrg#include "radeon.h"
37921a55d8Smrg#include "evergreen_shader.h"
38921a55d8Smrg#include "radeon_reg.h"
39921a55d8Smrg#include "evergreen_reg.h"
40921a55d8Smrg#include "evergreen_state.h"
41921a55d8Smrg
42921a55d8Smrg#include "radeon_drm.h"
43921a55d8Smrg#include "radeon_vbo.h"
44921a55d8Smrg#include "radeon_exa_shared.h"
45921a55d8Smrg
46921a55d8Smrgvoid
47921a55d8Smrgevergreen_start_3d(ScrnInfoPtr pScrn)
48921a55d8Smrg{
49921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
50921a55d8Smrg
51921a55d8Smrg    BEGIN_BATCH(3);
52921a55d8Smrg    PACK3(IT_CONTEXT_CONTROL, 2);
53921a55d8Smrg    E32(0x80000000);
54921a55d8Smrg    E32(0x80000000);
55921a55d8Smrg    END_BATCH();
56921a55d8Smrg
57921a55d8Smrg}
58921a55d8Smrg
59921a55d8Smrg/*
60921a55d8Smrg * Setup of functional groups
61921a55d8Smrg */
62921a55d8Smrg
63921a55d8Smrg// asic stack/thread/gpr limits - need to query the drm
64921a55d8Smrgstatic void
65921a55d8Smrgevergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
66921a55d8Smrg{
67921a55d8Smrg    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
68921a55d8Smrg    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
69921a55d8Smrg    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
70921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
71921a55d8Smrg
72921a55d8Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
73921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
74921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS))
75921a55d8Smrg	sq_config = 0;
76921a55d8Smrg    else
77921a55d8Smrg	sq_config = VC_ENABLE_bit;
78921a55d8Smrg
79921a55d8Smrg    sq_config |= (EXPORT_SRC_C_bit |
80921a55d8Smrg		  (sq_conf->cs_prio << CS_PRIO_shift) |
81921a55d8Smrg		  (sq_conf->ls_prio << LS_PRIO_shift) |
82921a55d8Smrg		  (sq_conf->hs_prio << HS_PRIO_shift) |
83921a55d8Smrg		  (sq_conf->ps_prio << PS_PRIO_shift) |
84921a55d8Smrg		  (sq_conf->vs_prio << VS_PRIO_shift) |
85921a55d8Smrg		  (sq_conf->gs_prio << GS_PRIO_shift) |
86921a55d8Smrg		  (sq_conf->es_prio << ES_PRIO_shift));
87921a55d8Smrg
88921a55d8Smrg    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
89921a55d8Smrg			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
90921a55d8Smrg			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
91921a55d8Smrg    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
92921a55d8Smrg			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
93921a55d8Smrg    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
94921a55d8Smrg			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
95921a55d8Smrg
96921a55d8Smrg    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
97921a55d8Smrg			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
98921a55d8Smrg			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
99921a55d8Smrg			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
100921a55d8Smrg    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
101921a55d8Smrg				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
102921a55d8Smrg
103921a55d8Smrg    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
104921a55d8Smrg				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
105921a55d8Smrg
106921a55d8Smrg    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
107921a55d8Smrg				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
108921a55d8Smrg
109921a55d8Smrg    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
110921a55d8Smrg				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
111921a55d8Smrg
112921a55d8Smrg    BEGIN_BATCH(16);
113921a55d8Smrg    /* disable dyn gprs */
114921a55d8Smrg    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
115921a55d8Smrg    PACK0(SQ_CONFIG, 4);
116921a55d8Smrg    E32(sq_config);
117921a55d8Smrg    E32(sq_gpr_resource_mgmt_1);
118921a55d8Smrg    E32(sq_gpr_resource_mgmt_2);
119921a55d8Smrg    E32(sq_gpr_resource_mgmt_3);
120921a55d8Smrg    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
121921a55d8Smrg    E32(sq_thread_resource_mgmt);
122921a55d8Smrg    E32(sq_thread_resource_mgmt_2);
123921a55d8Smrg    E32(sq_stack_resource_mgmt_1);
124921a55d8Smrg    E32(sq_stack_resource_mgmt_2);
125921a55d8Smrg    E32(sq_stack_resource_mgmt_3);
126921a55d8Smrg    END_BATCH();
127921a55d8Smrg}
128921a55d8Smrg
129921a55d8Smrgvoid
130921a55d8Smrgevergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
131921a55d8Smrg{
132921a55d8Smrg    uint32_t cb_color_info, cb_color_attrib, cb_color_dim;
133921a55d8Smrg    int pitch, slice, h;
134921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
135921a55d8Smrg
136921a55d8Smrg    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
137921a55d8Smrg		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
138921a55d8Smrg		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
139921a55d8Smrg		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
140921a55d8Smrg		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
141921a55d8Smrg		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
142921a55d8Smrg		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
143921a55d8Smrg    if (cb_conf->blend_clamp)
144921a55d8Smrg	cb_color_info |= BLEND_CLAMP_bit;
145921a55d8Smrg    if (cb_conf->fast_clear)
146921a55d8Smrg	cb_color_info |= FAST_CLEAR_bit;
147921a55d8Smrg    if (cb_conf->compression)
148921a55d8Smrg	cb_color_info |= COMPRESSION_bit;
149921a55d8Smrg    if (cb_conf->blend_bypass)
150921a55d8Smrg	cb_color_info |= BLEND_BYPASS_bit;
151921a55d8Smrg    if (cb_conf->simple_float)
152921a55d8Smrg	cb_color_info |= SIMPLE_FLOAT_bit;
153921a55d8Smrg    if (cb_conf->round_mode)
154921a55d8Smrg	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
155921a55d8Smrg    if (cb_conf->tile_compact)
156921a55d8Smrg	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
157921a55d8Smrg    if (cb_conf->rat)
158921a55d8Smrg	cb_color_info |= RAT_bit;
159921a55d8Smrg
160921a55d8Smrg    /* bit 4 needs to be set for linear and depth/stencil surfaces */
161921a55d8Smrg    cb_color_attrib = CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
162921a55d8Smrg
163921a55d8Smrg    pitch = (cb_conf->w / 8) - 1;
164921a55d8Smrg    h = RADEON_ALIGN(cb_conf->h, 8);
165921a55d8Smrg    slice = ((cb_conf->w * h) / 64) - 1;
166921a55d8Smrg
167921a55d8Smrg    switch (cb_conf->resource_type) {
168921a55d8Smrg    case BUFFER:
169921a55d8Smrg	/* number of elements in the surface */
170921a55d8Smrg	cb_color_dim = pitch * slice;
171921a55d8Smrg	break;
172921a55d8Smrg    default:
173921a55d8Smrg	/* w/h of the surface */
174921a55d8Smrg	cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
175921a55d8Smrg			((cb_conf->h - 1) << HEIGHT_MAX_shift));
176921a55d8Smrg	break;
177921a55d8Smrg    }
178921a55d8Smrg
179921a55d8Smrg    BEGIN_BATCH(3 + 2);
180921a55d8Smrg    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
181921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
182921a55d8Smrg    END_BATCH();
183921a55d8Smrg
184921a55d8Smrg    /* Set CMASK & FMASK buffer to the offset of color buffer as
185921a55d8Smrg     * we don't use those this shouldn't cause any issue and we
186921a55d8Smrg     * then have a valid cmd stream
187921a55d8Smrg     */
188921a55d8Smrg    BEGIN_BATCH(3 + 2);
189921a55d8Smrg    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
190921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
191921a55d8Smrg    END_BATCH();
192921a55d8Smrg    BEGIN_BATCH(3 + 2);
193921a55d8Smrg    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
194921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
195921a55d8Smrg    END_BATCH();
196921a55d8Smrg
197921a55d8Smrg    /* tiling config */
198921a55d8Smrg    BEGIN_BATCH(3 + 2);
199921a55d8Smrg    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
200921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
201921a55d8Smrg    END_BATCH();
202921a55d8Smrg    BEGIN_BATCH(3 + 2);
203921a55d8Smrg    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
204921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
205921a55d8Smrg    END_BATCH();
206921a55d8Smrg
207921a55d8Smrg    BEGIN_BATCH(24);
208921a55d8Smrg    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
209921a55d8Smrg    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
210921a55d8Smrg    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
211921a55d8Smrg    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
212921a55d8Smrg    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
213921a55d8Smrg    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
214921a55d8Smrg    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
215921a55d8Smrg    E32(0);
216921a55d8Smrg    E32(0);
217921a55d8Smrg    E32(0);
218921a55d8Smrg    E32(0);
219921a55d8Smrg    END_BATCH();
220921a55d8Smrg}
221921a55d8Smrg
222921a55d8Smrgstatic void
223921a55d8Smrgevergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
224921a55d8Smrg			      uint32_t size, uint64_t mc_addr,
225921a55d8Smrg			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
226921a55d8Smrg{
227921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
228921a55d8Smrg    uint32_t cp_coher_size;
229921a55d8Smrg    if (size == 0xffffffff)
230921a55d8Smrg	cp_coher_size = 0xffffffff;
231921a55d8Smrg    else
232921a55d8Smrg	cp_coher_size = ((size + 255) >> 8);
233921a55d8Smrg
234921a55d8Smrg    BEGIN_BATCH(5 + 2);
235921a55d8Smrg    PACK3(IT_SURFACE_SYNC, 4);
236921a55d8Smrg    E32(sync_type);
237921a55d8Smrg    E32(cp_coher_size);
238921a55d8Smrg    E32((mc_addr >> 8));
239921a55d8Smrg    E32(10); /* poll interval */
240921a55d8Smrg    RELOC_BATCH(bo, rdomains, wdomain);
241921a55d8Smrg    END_BATCH();
242921a55d8Smrg}
243921a55d8Smrg
244921a55d8Smrg/* inserts a wait for vline in the command stream */
245921a55d8Smrgvoid evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
246921a55d8Smrg				  xf86CrtcPtr crtc, int start, int stop)
247921a55d8Smrg{
248921a55d8Smrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
249921a55d8Smrg    drmmode_crtc_private_ptr drmmode_crtc;
250921a55d8Smrg    uint32_t offset;
251921a55d8Smrg
252921a55d8Smrg    if (!crtc)
253921a55d8Smrg        return;
254921a55d8Smrg
255921a55d8Smrg    drmmode_crtc = crtc->driver_private;
256921a55d8Smrg
257921a55d8Smrg    if (stop < start)
258921a55d8Smrg        return;
259921a55d8Smrg
260921a55d8Smrg    if (!crtc->enabled)
261921a55d8Smrg        return;
262921a55d8Smrg
263921a55d8Smrg    if (info->cs) {
264921a55d8Smrg        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
265921a55d8Smrg	    return;
266921a55d8Smrg    } else {
267921a55d8Smrg#ifdef USE_EXA
268921a55d8Smrg	if (info->useEXA)
269921a55d8Smrg	    offset = exaGetPixmapOffset(pPix);
270921a55d8Smrg	else
271921a55d8Smrg#endif
272921a55d8Smrg	    offset = pPix->devPrivate.ptr - info->FB;
273921a55d8Smrg
274921a55d8Smrg	/* if drawing to front buffer */
275921a55d8Smrg	if (offset != 0)
276921a55d8Smrg	    return;
277921a55d8Smrg    }
278921a55d8Smrg
279921a55d8Smrg    start = max(start, 0);
280921a55d8Smrg    stop = min(stop, crtc->mode.VDisplay);
281921a55d8Smrg
282921a55d8Smrg    if (start > crtc->mode.VDisplay)
283921a55d8Smrg        return;
284921a55d8Smrg
285921a55d8Smrg    BEGIN_BATCH(11);
286921a55d8Smrg    /* set the VLINE range */
287921a55d8Smrg    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
288921a55d8Smrg	 (start << EVERGREEN_VLINE_START_SHIFT) |
289921a55d8Smrg	 (stop << EVERGREEN_VLINE_END_SHIFT));
290921a55d8Smrg
291921a55d8Smrg    /* tell the CP to poll the VLINE state register */
292921a55d8Smrg    PACK3(IT_WAIT_REG_MEM, 6);
293921a55d8Smrg    E32(IT_WAIT_REG | IT_WAIT_EQ);
294921a55d8Smrg    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
295921a55d8Smrg    E32(0);
296921a55d8Smrg    E32(0);                          // Ref value
297921a55d8Smrg    E32(EVERGREEN_VLINE_STAT);    // Mask
298921a55d8Smrg    E32(10);                         // Wait interval
299921a55d8Smrg    /* add crtc reloc */
300921a55d8Smrg    PACK3(IT_NOP, 1);
301921a55d8Smrg    E32(drmmode_crtc->mode_crtc->crtc_id);
302921a55d8Smrg    END_BATCH();
303921a55d8Smrg}
304921a55d8Smrg
305921a55d8Smrgvoid
306921a55d8Smrgevergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
307921a55d8Smrg{
308921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
309921a55d8Smrg    uint32_t sq_pgm_resources;
310921a55d8Smrg
311921a55d8Smrg    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
312921a55d8Smrg			(fs_conf->stack_size << STACK_SIZE_shift));
313921a55d8Smrg
314921a55d8Smrg    if (fs_conf->dx10_clamp)
315921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
316921a55d8Smrg
317921a55d8Smrg    BEGIN_BATCH(3 + 2);
318921a55d8Smrg    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
319921a55d8Smrg    RELOC_BATCH(fs_conf->bo, domain, 0);
320921a55d8Smrg    END_BATCH();
321921a55d8Smrg
322921a55d8Smrg    BEGIN_BATCH(3);
323921a55d8Smrg    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
324921a55d8Smrg    END_BATCH();
325921a55d8Smrg}
326921a55d8Smrg
327921a55d8Smrgvoid
328921a55d8Smrgevergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
329921a55d8Smrg{
330921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
331921a55d8Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
332921a55d8Smrg
333921a55d8Smrg    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
334921a55d8Smrg			(vs_conf->stack_size << STACK_SIZE_shift));
335921a55d8Smrg
336921a55d8Smrg    if (vs_conf->dx10_clamp)
337921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
338921a55d8Smrg    if (vs_conf->uncached_first_inst)
339921a55d8Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
340921a55d8Smrg
341921a55d8Smrg    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
342921a55d8Smrg			  (vs_conf->double_round << DOUBLE_ROUND_shift));
343921a55d8Smrg
344921a55d8Smrg    if (vs_conf->allow_sdi)
345921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
346921a55d8Smrg    if (vs_conf->allow_sd0)
347921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
348921a55d8Smrg    if (vs_conf->allow_ddi)
349921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
350921a55d8Smrg    if (vs_conf->allow_ddo)
351921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
352921a55d8Smrg
353921a55d8Smrg    /* flush SQ cache */
354921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
355921a55d8Smrg				  vs_conf->shader_size, vs_conf->shader_addr,
356921a55d8Smrg				  vs_conf->bo, domain, 0);
357921a55d8Smrg
358921a55d8Smrg    BEGIN_BATCH(3 + 2);
359921a55d8Smrg    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
360921a55d8Smrg    RELOC_BATCH(vs_conf->bo, domain, 0);
361921a55d8Smrg    END_BATCH();
362921a55d8Smrg
363921a55d8Smrg    BEGIN_BATCH(4);
364921a55d8Smrg    PACK0(SQ_PGM_RESOURCES_VS, 2);
365921a55d8Smrg    E32(sq_pgm_resources);
366921a55d8Smrg    E32(sq_pgm_resources_2);
367921a55d8Smrg    END_BATCH();
368921a55d8Smrg}
369921a55d8Smrg
370921a55d8Smrgvoid
371921a55d8Smrgevergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
372921a55d8Smrg{
373921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
374921a55d8Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
375921a55d8Smrg
376921a55d8Smrg    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
377921a55d8Smrg			(ps_conf->stack_size << STACK_SIZE_shift));
378921a55d8Smrg
379921a55d8Smrg    if (ps_conf->dx10_clamp)
380921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
381921a55d8Smrg    if (ps_conf->uncached_first_inst)
382921a55d8Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
383921a55d8Smrg    if (ps_conf->clamp_consts)
384921a55d8Smrg	sq_pgm_resources |= CLAMP_CONSTS_bit;
385921a55d8Smrg
386921a55d8Smrg    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
387921a55d8Smrg			  (ps_conf->double_round << DOUBLE_ROUND_shift));
388921a55d8Smrg
389921a55d8Smrg    if (ps_conf->allow_sdi)
390921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
391921a55d8Smrg    if (ps_conf->allow_sd0)
392921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
393921a55d8Smrg    if (ps_conf->allow_ddi)
394921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
395921a55d8Smrg    if (ps_conf->allow_ddo)
396921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
397921a55d8Smrg
398921a55d8Smrg    /* flush SQ cache */
399921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
400921a55d8Smrg				  ps_conf->shader_size, ps_conf->shader_addr,
401921a55d8Smrg				  ps_conf->bo, domain, 0);
402921a55d8Smrg
403921a55d8Smrg    BEGIN_BATCH(3 + 2);
404921a55d8Smrg    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
405921a55d8Smrg    RELOC_BATCH(ps_conf->bo, domain, 0);
406921a55d8Smrg    END_BATCH();
407921a55d8Smrg
408921a55d8Smrg    BEGIN_BATCH(5);
409921a55d8Smrg    PACK0(SQ_PGM_RESOURCES_PS, 3);
410921a55d8Smrg    E32(sq_pgm_resources);
411921a55d8Smrg    E32(sq_pgm_resources_2);
412921a55d8Smrg    E32(ps_conf->export_mode);
413921a55d8Smrg    END_BATCH();
414921a55d8Smrg}
415921a55d8Smrg
416921a55d8Smrgvoid
417921a55d8Smrgevergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
418921a55d8Smrg{
419921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
420921a55d8Smrg    /* size reg is units of 16 consts (4 dwords each) */
421921a55d8Smrg    uint32_t size = const_conf->size_bytes >> 8;
422921a55d8Smrg
423921a55d8Smrg    if (size == 0)
424921a55d8Smrg	size = 1;
425921a55d8Smrg
426921a55d8Smrg    /* flush SQ cache */
427921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
428921a55d8Smrg				  const_conf->size_bytes, const_conf->const_addr,
429921a55d8Smrg				  const_conf->bo, domain, 0);
430921a55d8Smrg
431921a55d8Smrg    switch (const_conf->type) {
432921a55d8Smrg    case SHADER_TYPE_VS:
433921a55d8Smrg	BEGIN_BATCH(3);
434921a55d8Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
435921a55d8Smrg	END_BATCH();
436921a55d8Smrg	BEGIN_BATCH(3 + 2);
437921a55d8Smrg	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
438921a55d8Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
439921a55d8Smrg	END_BATCH();
440921a55d8Smrg	break;
441921a55d8Smrg    case SHADER_TYPE_PS:
442921a55d8Smrg	BEGIN_BATCH(3);
443921a55d8Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
444921a55d8Smrg	END_BATCH();
445921a55d8Smrg	BEGIN_BATCH(3 + 2);
446921a55d8Smrg	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
447921a55d8Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
448921a55d8Smrg	END_BATCH();
449921a55d8Smrg	break;
450921a55d8Smrg    default:
451921a55d8Smrg	ErrorF("Unsupported const type %d\n", const_conf->type);
452921a55d8Smrg	break;
453921a55d8Smrg    }
454921a55d8Smrg
455921a55d8Smrg}
456921a55d8Smrg
457921a55d8Smrgvoid
458921a55d8Smrgevergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
459921a55d8Smrg{
460921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
461921a55d8Smrg    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
462921a55d8Smrg     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
463921a55d8Smrg     */
464921a55d8Smrg    BEGIN_BATCH(3);
465921a55d8Smrg    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
466921a55d8Smrg    END_BATCH();
467921a55d8Smrg}
468921a55d8Smrg
469921a55d8Smrgstatic void
470921a55d8Smrgevergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
471921a55d8Smrg{
472921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
473921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
474921a55d8Smrg    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
475921a55d8Smrg
476921a55d8Smrg    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
477921a55d8Smrg			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
478921a55d8Smrg			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
479921a55d8Smrg			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
480921a55d8Smrg			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
481921a55d8Smrg    if (res->clamp_x)
482921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
483921a55d8Smrg
484921a55d8Smrg    if (res->format_comp_all)
485921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
486921a55d8Smrg
487921a55d8Smrg    if (res->srf_mode_all)
488921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
489921a55d8Smrg
490921a55d8Smrg    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
491921a55d8Smrg			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
492921a55d8Smrg			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
493921a55d8Smrg			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
494921a55d8Smrg
495921a55d8Smrg    if (res->uncached)
496921a55d8Smrg	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
497921a55d8Smrg
498921a55d8Smrg    /* XXX ??? */
499921a55d8Smrg    sq_vtx_constant_word4 = 0;
500921a55d8Smrg
501921a55d8Smrg    /* flush vertex cache */
502921a55d8Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
503921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
504921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS))
505921a55d8Smrg	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
506921a55d8Smrg				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
507921a55d8Smrg				      res->bo,
508921a55d8Smrg				      domain, 0);
509921a55d8Smrg    else
510921a55d8Smrg	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
511921a55d8Smrg				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
512921a55d8Smrg				      res->bo,
513921a55d8Smrg				      domain, 0);
514921a55d8Smrg
515921a55d8Smrg    BEGIN_BATCH(10 + 2);
516921a55d8Smrg    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
517921a55d8Smrg    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
518921a55d8Smrg    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
519921a55d8Smrg    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
520921a55d8Smrg    E32(sq_vtx_constant_word3);		// 3: swizzles
521921a55d8Smrg    E32(sq_vtx_constant_word4);		// 4: num elements
522921a55d8Smrg    E32(0);							// 5: n/a
523921a55d8Smrg    E32(0);							// 6: n/a
524921a55d8Smrg    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
525921a55d8Smrg    RELOC_BATCH(res->bo, domain, 0);
526921a55d8Smrg    END_BATCH();
527921a55d8Smrg}
528921a55d8Smrg
529921a55d8Smrgvoid
530921a55d8Smrgevergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
531921a55d8Smrg{
532921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
533921a55d8Smrg    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
534921a55d8Smrg    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
535921a55d8Smrg
536921a55d8Smrg    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
537921a55d8Smrg
538921a55d8Smrg    if (tex_res->w)
539921a55d8Smrg	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
540921a55d8Smrg				  ((tex_res->w - 1) << TEX_WIDTH_shift));
541921a55d8Smrg
542921a55d8Smrg    if (tex_res->tile_type)
543921a55d8Smrg	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
544921a55d8Smrg
545921a55d8Smrg    sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
546921a55d8Smrg
547921a55d8Smrg    if (tex_res->h)
548921a55d8Smrg	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
549921a55d8Smrg    if (tex_res->depth)
550921a55d8Smrg	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
551921a55d8Smrg
552921a55d8Smrg    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
553921a55d8Smrg			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
554921a55d8Smrg			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
555921a55d8Smrg			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
556921a55d8Smrg			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
557921a55d8Smrg			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
558921a55d8Smrg			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
559921a55d8Smrg			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
560921a55d8Smrg			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
561921a55d8Smrg			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
562921a55d8Smrg			     (tex_res->base_level << BASE_LEVEL_shift));
563921a55d8Smrg
564921a55d8Smrg    if (tex_res->srf_mode_all)
565921a55d8Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
566921a55d8Smrg    if (tex_res->force_degamma)
567921a55d8Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
568921a55d8Smrg
569921a55d8Smrg    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
570921a55d8Smrg			     (tex_res->base_array << BASE_ARRAY_shift) |
571921a55d8Smrg			     (tex_res->last_array << LAST_ARRAY_shift));
572921a55d8Smrg
573921a55d8Smrg    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
574921a55d8Smrg			     (tex_res->perf_modulation << PERF_MODULATION_shift));
575921a55d8Smrg
576921a55d8Smrg    if (tex_res->interlaced)
577921a55d8Smrg	sq_tex_resource_word6 |= INTERLACED_bit;
578921a55d8Smrg
579921a55d8Smrg    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
580921a55d8Smrg			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
581921a55d8Smrg
582921a55d8Smrg    /* flush texture cache */
583921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
584921a55d8Smrg				  tex_res->size, tex_res->base,
585921a55d8Smrg				  tex_res->bo, domain, 0);
586921a55d8Smrg
587921a55d8Smrg    BEGIN_BATCH(10 + 4);
588921a55d8Smrg    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
589921a55d8Smrg    E32(sq_tex_resource_word0);
590921a55d8Smrg    E32(sq_tex_resource_word1);
591921a55d8Smrg    E32(((tex_res->base) >> 8));
592921a55d8Smrg    E32(((tex_res->mip_base) >> 8));
593921a55d8Smrg    E32(sq_tex_resource_word4);
594921a55d8Smrg    E32(sq_tex_resource_word5);
595921a55d8Smrg    E32(sq_tex_resource_word6);
596921a55d8Smrg    E32(sq_tex_resource_word7);
597921a55d8Smrg    RELOC_BATCH(tex_res->bo, domain, 0);
598921a55d8Smrg    RELOC_BATCH(tex_res->mip_bo, domain, 0);
599921a55d8Smrg    END_BATCH();
600921a55d8Smrg}
601921a55d8Smrg
602921a55d8Smrgvoid
603921a55d8Smrgevergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
604921a55d8Smrg{
605921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
606921a55d8Smrg    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
607921a55d8Smrg
608921a55d8Smrg    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
609921a55d8Smrg			    (s->clamp_y       << CLAMP_Y_shift)					|
610921a55d8Smrg			    (s->clamp_z       << CLAMP_Z_shift)					|
611921a55d8Smrg			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
612921a55d8Smrg			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
613921a55d8Smrg			    (s->z_filter      << Z_FILTER_shift)	|
614921a55d8Smrg			    (s->mip_filter    << MIP_FILTER_shift)				|
615921a55d8Smrg			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
616921a55d8Smrg			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
617921a55d8Smrg			    (s->chroma_key    << CHROMA_KEY_shift));
618921a55d8Smrg
619921a55d8Smrg    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
620921a55d8Smrg			    (s->max_lod       << MAX_LOD_shift)					|
621921a55d8Smrg			    (s->perf_mip      << PERF_MIP_shift)	|
622921a55d8Smrg			    (s->perf_z        << PERF_Z_shift));
623921a55d8Smrg
624921a55d8Smrg
625921a55d8Smrg    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
626921a55d8Smrg			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
627921a55d8Smrg
628921a55d8Smrg    if (s->mc_coord_truncate)
629921a55d8Smrg	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
630921a55d8Smrg    if (s->force_degamma)
631921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
632921a55d8Smrg    if (s->truncate_coord)
633921a55d8Smrg	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
634921a55d8Smrg    if (s->disable_cube_wrap)
635921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
636921a55d8Smrg    if (s->type)
637921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
638921a55d8Smrg
639921a55d8Smrg    BEGIN_BATCH(5);
640921a55d8Smrg    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
641921a55d8Smrg    E32(sq_tex_sampler_word0);
642921a55d8Smrg    E32(sq_tex_sampler_word1);
643921a55d8Smrg    E32(sq_tex_sampler_word2);
644921a55d8Smrg    END_BATCH();
645921a55d8Smrg}
646921a55d8Smrg
647921a55d8Smrg//XXX deal with clip offsets in clip setup
648921a55d8Smrgvoid
649921a55d8Smrgevergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
650921a55d8Smrg{
651921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
652921a55d8Smrg
653921a55d8Smrg    BEGIN_BATCH(4);
654921a55d8Smrg    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
655921a55d8Smrg    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
656921a55d8Smrg	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
657921a55d8Smrg    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
658921a55d8Smrg	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
659921a55d8Smrg    END_BATCH();
660921a55d8Smrg}
661921a55d8Smrg
662921a55d8Smrgvoid
663921a55d8Smrgevergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
664921a55d8Smrg{
665921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
666921a55d8Smrg
667921a55d8Smrg    BEGIN_BATCH(4);
668921a55d8Smrg    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
669921a55d8Smrg    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
670921a55d8Smrg	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
671921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
672921a55d8Smrg    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
673921a55d8Smrg	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
674921a55d8Smrg    END_BATCH();
675921a55d8Smrg}
676921a55d8Smrg
677921a55d8Smrgvoid
678921a55d8Smrgevergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
679921a55d8Smrg{
680921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
681921a55d8Smrg
682921a55d8Smrg    BEGIN_BATCH(4);
683921a55d8Smrg    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
684921a55d8Smrg    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
685921a55d8Smrg	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
686921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
687921a55d8Smrg    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
688921a55d8Smrg	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
689921a55d8Smrg    END_BATCH();
690921a55d8Smrg}
691921a55d8Smrg
692921a55d8Smrgvoid
693921a55d8Smrgevergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
694921a55d8Smrg{
695921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
696921a55d8Smrg
697921a55d8Smrg    BEGIN_BATCH(4);
698921a55d8Smrg    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
699921a55d8Smrg    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
700921a55d8Smrg	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
701921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
702921a55d8Smrg    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
703921a55d8Smrg	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
704921a55d8Smrg    END_BATCH();
705921a55d8Smrg}
706921a55d8Smrg
707921a55d8Smrgvoid
708921a55d8Smrgevergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
709921a55d8Smrg{
710921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
711921a55d8Smrg
712921a55d8Smrg    BEGIN_BATCH(4);
713921a55d8Smrg    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
714921a55d8Smrg    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
715921a55d8Smrg	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
716921a55d8Smrg    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
717921a55d8Smrg	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
718921a55d8Smrg    END_BATCH();
719921a55d8Smrg}
720921a55d8Smrg
721921a55d8Smrg/*
722921a55d8Smrg * Setup of default state
723921a55d8Smrg */
724921a55d8Smrg
725921a55d8Smrgvoid
726921a55d8Smrgevergreen_set_default_state(ScrnInfoPtr pScrn)
727921a55d8Smrg{
728921a55d8Smrg    tex_resource_t tex_res;
729921a55d8Smrg    shader_config_t fs_conf;
730921a55d8Smrg    sq_config_t sq_conf;
731921a55d8Smrg    int i;
732921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
733921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
734921a55d8Smrg
735921a55d8Smrg    if (accel_state->XInited3D)
736921a55d8Smrg	return;
737921a55d8Smrg
738921a55d8Smrg    memset(&tex_res, 0, sizeof(tex_resource_t));
739921a55d8Smrg    memset(&fs_conf, 0, sizeof(shader_config_t));
740921a55d8Smrg
741921a55d8Smrg    accel_state->XInited3D = TRUE;
742921a55d8Smrg
743921a55d8Smrg    evergreen_start_3d(pScrn);
744921a55d8Smrg
745921a55d8Smrg    /* SQ */
746921a55d8Smrg    sq_conf.ps_prio = 0;
747921a55d8Smrg    sq_conf.vs_prio = 1;
748921a55d8Smrg    sq_conf.gs_prio = 2;
749921a55d8Smrg    sq_conf.es_prio = 3;
750921a55d8Smrg    sq_conf.hs_prio = 0;
751921a55d8Smrg    sq_conf.ls_prio = 0;
752921a55d8Smrg    sq_conf.cs_prio = 0;
753921a55d8Smrg
754921a55d8Smrg    switch (info->ChipFamily) {
755921a55d8Smrg    case CHIP_FAMILY_CEDAR:
756921a55d8Smrg    default:
757921a55d8Smrg	sq_conf.num_ps_gprs = 93;
758921a55d8Smrg	sq_conf.num_vs_gprs = 46;
759921a55d8Smrg	sq_conf.num_temp_gprs = 4;
760921a55d8Smrg	sq_conf.num_gs_gprs = 31;
761921a55d8Smrg	sq_conf.num_es_gprs = 31;
762921a55d8Smrg	sq_conf.num_hs_gprs = 23;
763921a55d8Smrg	sq_conf.num_ls_gprs = 23;
764921a55d8Smrg	sq_conf.num_ps_threads = 96;
765921a55d8Smrg	sq_conf.num_vs_threads = 16;
766921a55d8Smrg	sq_conf.num_gs_threads = 16;
767921a55d8Smrg	sq_conf.num_es_threads = 16;
768921a55d8Smrg	sq_conf.num_hs_threads = 16;
769921a55d8Smrg	sq_conf.num_ls_threads = 16;
770921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
771921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
772921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
773921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
774921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
775921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
776921a55d8Smrg	break;
777921a55d8Smrg    case CHIP_FAMILY_REDWOOD:
778921a55d8Smrg	sq_conf.num_ps_gprs = 93;
779921a55d8Smrg	sq_conf.num_vs_gprs = 46;
780921a55d8Smrg	sq_conf.num_temp_gprs = 4;
781921a55d8Smrg	sq_conf.num_gs_gprs = 31;
782921a55d8Smrg	sq_conf.num_es_gprs = 31;
783921a55d8Smrg	sq_conf.num_hs_gprs = 23;
784921a55d8Smrg	sq_conf.num_ls_gprs = 23;
785921a55d8Smrg	sq_conf.num_ps_threads = 128;
786921a55d8Smrg	sq_conf.num_vs_threads = 20;
787921a55d8Smrg	sq_conf.num_gs_threads = 20;
788921a55d8Smrg	sq_conf.num_es_threads = 20;
789921a55d8Smrg	sq_conf.num_hs_threads = 20;
790921a55d8Smrg	sq_conf.num_ls_threads = 20;
791921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
792921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
793921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
794921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
795921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
796921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
797921a55d8Smrg	break;
798921a55d8Smrg    case CHIP_FAMILY_JUNIPER:
799921a55d8Smrg	sq_conf.num_ps_gprs = 93;
800921a55d8Smrg	sq_conf.num_vs_gprs = 46;
801921a55d8Smrg	sq_conf.num_temp_gprs = 4;
802921a55d8Smrg	sq_conf.num_gs_gprs = 31;
803921a55d8Smrg	sq_conf.num_es_gprs = 31;
804921a55d8Smrg	sq_conf.num_hs_gprs = 23;
805921a55d8Smrg	sq_conf.num_ls_gprs = 23;
806921a55d8Smrg	sq_conf.num_ps_threads = 128;
807921a55d8Smrg	sq_conf.num_vs_threads = 20;
808921a55d8Smrg	sq_conf.num_gs_threads = 20;
809921a55d8Smrg	sq_conf.num_es_threads = 20;
810921a55d8Smrg	sq_conf.num_hs_threads = 20;
811921a55d8Smrg	sq_conf.num_ls_threads = 20;
812921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
813921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
814921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
815921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
816921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
817921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
818921a55d8Smrg	break;
819921a55d8Smrg    case CHIP_FAMILY_CYPRESS:
820921a55d8Smrg    case CHIP_FAMILY_HEMLOCK:
821921a55d8Smrg	sq_conf.num_ps_gprs = 93;
822921a55d8Smrg	sq_conf.num_vs_gprs = 46;
823921a55d8Smrg	sq_conf.num_temp_gprs = 4;
824921a55d8Smrg	sq_conf.num_gs_gprs = 31;
825921a55d8Smrg	sq_conf.num_es_gprs = 31;
826921a55d8Smrg	sq_conf.num_hs_gprs = 23;
827921a55d8Smrg	sq_conf.num_ls_gprs = 23;
828921a55d8Smrg	sq_conf.num_ps_threads = 128;
829921a55d8Smrg	sq_conf.num_vs_threads = 20;
830921a55d8Smrg	sq_conf.num_gs_threads = 20;
831921a55d8Smrg	sq_conf.num_es_threads = 20;
832921a55d8Smrg	sq_conf.num_hs_threads = 20;
833921a55d8Smrg	sq_conf.num_ls_threads = 20;
834921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
835921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
836921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
837921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
838921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
839921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
840921a55d8Smrg	break;
841921a55d8Smrg    case CHIP_FAMILY_PALM:
842921a55d8Smrg	sq_conf.num_ps_gprs = 93;
843921a55d8Smrg	sq_conf.num_vs_gprs = 46;
844921a55d8Smrg	sq_conf.num_temp_gprs = 4;
845921a55d8Smrg	sq_conf.num_gs_gprs = 31;
846921a55d8Smrg	sq_conf.num_es_gprs = 31;
847921a55d8Smrg	sq_conf.num_hs_gprs = 23;
848921a55d8Smrg	sq_conf.num_ls_gprs = 23;
849921a55d8Smrg	sq_conf.num_ps_threads = 96;
850921a55d8Smrg	sq_conf.num_vs_threads = 16;
851921a55d8Smrg	sq_conf.num_gs_threads = 16;
852921a55d8Smrg	sq_conf.num_es_threads = 16;
853921a55d8Smrg	sq_conf.num_hs_threads = 16;
854921a55d8Smrg	sq_conf.num_ls_threads = 16;
855921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
856921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
857921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
858921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
859921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
860921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
861921a55d8Smrg	break;
862921a55d8Smrg    case CHIP_FAMILY_BARTS:
863921a55d8Smrg	sq_conf.num_ps_gprs = 93;
864921a55d8Smrg	sq_conf.num_vs_gprs = 46;
865921a55d8Smrg	sq_conf.num_temp_gprs = 4;
866921a55d8Smrg	sq_conf.num_gs_gprs = 31;
867921a55d8Smrg	sq_conf.num_es_gprs = 31;
868921a55d8Smrg	sq_conf.num_hs_gprs = 23;
869921a55d8Smrg	sq_conf.num_ls_gprs = 23;
870921a55d8Smrg	sq_conf.num_ps_threads = 128;
871921a55d8Smrg	sq_conf.num_vs_threads = 20;
872921a55d8Smrg	sq_conf.num_gs_threads = 20;
873921a55d8Smrg	sq_conf.num_es_threads = 20;
874921a55d8Smrg	sq_conf.num_hs_threads = 20;
875921a55d8Smrg	sq_conf.num_ls_threads = 20;
876921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
877921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
878921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
879921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
880921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
881921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
882921a55d8Smrg	break;
883921a55d8Smrg    case CHIP_FAMILY_TURKS:
884921a55d8Smrg	sq_conf.num_ps_gprs = 93;
885921a55d8Smrg	sq_conf.num_vs_gprs = 46;
886921a55d8Smrg	sq_conf.num_temp_gprs = 4;
887921a55d8Smrg	sq_conf.num_gs_gprs = 31;
888921a55d8Smrg	sq_conf.num_es_gprs = 31;
889921a55d8Smrg	sq_conf.num_hs_gprs = 23;
890921a55d8Smrg	sq_conf.num_ls_gprs = 23;
891921a55d8Smrg	sq_conf.num_ps_threads = 128;
892921a55d8Smrg	sq_conf.num_vs_threads = 20;
893921a55d8Smrg	sq_conf.num_gs_threads = 20;
894921a55d8Smrg	sq_conf.num_es_threads = 20;
895921a55d8Smrg	sq_conf.num_hs_threads = 20;
896921a55d8Smrg	sq_conf.num_ls_threads = 20;
897921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
898921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
899921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
900921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
901921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
902921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
903921a55d8Smrg	break;
904921a55d8Smrg    case CHIP_FAMILY_CAICOS:
905921a55d8Smrg	sq_conf.num_ps_gprs = 93;
906921a55d8Smrg	sq_conf.num_vs_gprs = 46;
907921a55d8Smrg	sq_conf.num_temp_gprs = 4;
908921a55d8Smrg	sq_conf.num_gs_gprs = 31;
909921a55d8Smrg	sq_conf.num_es_gprs = 31;
910921a55d8Smrg	sq_conf.num_hs_gprs = 23;
911921a55d8Smrg	sq_conf.num_ls_gprs = 23;
912921a55d8Smrg	sq_conf.num_ps_threads = 128;
913921a55d8Smrg	sq_conf.num_vs_threads = 10;
914921a55d8Smrg	sq_conf.num_gs_threads = 10;
915921a55d8Smrg	sq_conf.num_es_threads = 10;
916921a55d8Smrg	sq_conf.num_hs_threads = 10;
917921a55d8Smrg	sq_conf.num_ls_threads = 10;
918921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
919921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
920921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
921921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
922921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
923921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
924921a55d8Smrg	break;
925921a55d8Smrg    }
926921a55d8Smrg
927921a55d8Smrg    evergreen_sq_setup(pScrn, &sq_conf);
928921a55d8Smrg
929921a55d8Smrg    BEGIN_BATCH(24);
930921a55d8Smrg    EREG(SQ_LDS_ALLOC_PS, 0);
931921a55d8Smrg    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
932921a55d8Smrg
933921a55d8Smrg    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
934921a55d8Smrg    E32(0);
935921a55d8Smrg    E32(0);
936921a55d8Smrg    E32(0);
937921a55d8Smrg    E32(0);
938921a55d8Smrg    E32(0);
939921a55d8Smrg    E32(0);
940921a55d8Smrg
941921a55d8Smrg    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
942921a55d8Smrg    E32(0);
943921a55d8Smrg    E32(0);
944921a55d8Smrg    E32(0);
945921a55d8Smrg    E32(0);
946921a55d8Smrg
947921a55d8Smrg    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
948921a55d8Smrg    E32(0);
949921a55d8Smrg    E32(0);
950921a55d8Smrg    END_BATCH();
951921a55d8Smrg
952921a55d8Smrg    /* DB */
953921a55d8Smrg    BEGIN_BATCH(3 + 2);
954921a55d8Smrg    EREG(DB_Z_INFO,                           0);
955921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
956921a55d8Smrg    END_BATCH();
957921a55d8Smrg
958921a55d8Smrg    BEGIN_BATCH(3 + 2);
959921a55d8Smrg    EREG(DB_STENCIL_INFO,                     0);
960921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
961921a55d8Smrg    END_BATCH();
962921a55d8Smrg
963921a55d8Smrg    BEGIN_BATCH(3 + 2);
964921a55d8Smrg    EREG(DB_HTILE_DATA_BASE,                    0);
965921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
966921a55d8Smrg    END_BATCH();
967921a55d8Smrg
968921a55d8Smrg    BEGIN_BATCH(49);
969921a55d8Smrg    EREG(DB_DEPTH_CONTROL,                    0);
970921a55d8Smrg
971921a55d8Smrg    PACK0(PA_SC_VPORT_ZMIN_0, 2);
972921a55d8Smrg    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
973921a55d8Smrg    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
974921a55d8Smrg
975921a55d8Smrg    PACK0(DB_RENDER_CONTROL, 5);
976921a55d8Smrg    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
977921a55d8Smrg    E32(0); // DB_COUNT_CONTROL
978921a55d8Smrg    E32(0); // DB_DEPTH_VIEW
979921a55d8Smrg    E32(0x2a); // DB_RENDER_OVERRIDE
980921a55d8Smrg    E32(0); // DB_RENDER_OVERRIDE2
981921a55d8Smrg
982921a55d8Smrg    PACK0(DB_STENCIL_CLEAR, 2);
983921a55d8Smrg    E32(0); // DB_STENCIL_CLEAR
984921a55d8Smrg    E32(0); // DB_DEPTH_CLEAR
985921a55d8Smrg
986921a55d8Smrg    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
987921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
988921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
989921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
990921a55d8Smrg
991921a55d8Smrg    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
992921a55d8Smrg			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
993921a55d8Smrg
994921a55d8Smrg    // SX
995921a55d8Smrg    EREG(SX_MISC,               0);
996921a55d8Smrg
997921a55d8Smrg    // CB
998921a55d8Smrg    PACK0(SX_ALPHA_TEST_CONTROL, 5);
999921a55d8Smrg    E32(0); // SX_ALPHA_TEST_CONTROL
1000921a55d8Smrg    E32(0x00000000); //CB_BLEND_RED
1001921a55d8Smrg    E32(0x00000000); //CB_BLEND_GREEN
1002921a55d8Smrg    E32(0x00000000); //CB_BLEND_BLUE
1003921a55d8Smrg    E32(0x00000000); //CB_BLEND_ALPHA
1004921a55d8Smrg
1005921a55d8Smrg    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1006921a55d8Smrg
1007921a55d8Smrg    // SC
1008921a55d8Smrg    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1009921a55d8Smrg					       (0 << WINDOW_Y_OFFSET_shift)));
1010921a55d8Smrg    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1011921a55d8Smrg    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1012921a55d8Smrg    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1013921a55d8Smrg    END_BATCH();
1014921a55d8Smrg
1015921a55d8Smrg    /* clip boolean is set to always visible -> doesn't matter */
1016921a55d8Smrg    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1017921a55d8Smrg	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1018921a55d8Smrg
1019921a55d8Smrg    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1020921a55d8Smrg	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1021921a55d8Smrg
1022921a55d8Smrg    BEGIN_BATCH(50);
1023921a55d8Smrg    PACK0(PA_SC_MODE_CNTL_0, 2);
1024921a55d8Smrg    E32(0); // PA_SC_MODE_CNTL_0
1025921a55d8Smrg    E32(0); // PA_SC_MODE_CNTL_1
1026921a55d8Smrg
1027921a55d8Smrg    PACK0(PA_SC_LINE_CNTL, 16);
1028921a55d8Smrg    E32(0); // PA_SC_LINE_CNTL
1029921a55d8Smrg    E32(0); // PA_SC_AA_CONFIG
1030921a55d8Smrg    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1031921a55d8Smrg	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1032921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1033921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1034921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1035921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1036921a55d8Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1037921a55d8Smrg    E32(0);
1038921a55d8Smrg    E32(0);
1039921a55d8Smrg    E32(0);
1040921a55d8Smrg    E32(0);
1041921a55d8Smrg    E32(0);
1042921a55d8Smrg    E32(0);
1043921a55d8Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1044921a55d8Smrg    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1045921a55d8Smrg
1046921a55d8Smrg    // CL
1047921a55d8Smrg    PACK0(PA_CL_CLIP_CNTL, 8);
1048921a55d8Smrg    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1049921a55d8Smrg    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1050921a55d8Smrg    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1051921a55d8Smrg    E32(0); // PA_CL_VS_OUT_CNTL
1052921a55d8Smrg    E32(0); // PA_CL_NANINF_CNTL
1053921a55d8Smrg    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1054921a55d8Smrg    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1055921a55d8Smrg    E32(0); // PA_SU_PRIM_FILTER_CNTL
1056921a55d8Smrg
1057921a55d8Smrg    // SU
1058921a55d8Smrg    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1059921a55d8Smrg    E32(0);
1060921a55d8Smrg    E32(0);
1061921a55d8Smrg    E32(0);
1062921a55d8Smrg    E32(0);
1063921a55d8Smrg    E32(0);
1064921a55d8Smrg    E32(0);
1065921a55d8Smrg
1066921a55d8Smrg    PACK0(SPI_INPUT_Z, 8);
1067921a55d8Smrg    E32(0); // SPI_INPUT_Z
1068921a55d8Smrg    E32(0); // SPI_FOG_CNTL
1069921a55d8Smrg    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1070921a55d8Smrg    E32(0); // SPI_PS_IN_CONTROL_2
1071921a55d8Smrg    E32(0);
1072921a55d8Smrg    E32(0);
1073921a55d8Smrg    E32(0);
1074921a55d8Smrg    E32(0);
1075921a55d8Smrg    END_BATCH();
1076921a55d8Smrg
1077921a55d8Smrg    // clear FS
1078921a55d8Smrg    fs_conf.bo = accel_state->shaders_bo;
1079921a55d8Smrg    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1080921a55d8Smrg
1081921a55d8Smrg    // VGT
1082921a55d8Smrg    BEGIN_BATCH(46);
1083921a55d8Smrg
1084921a55d8Smrg    PACK0(VGT_MAX_VTX_INDX, 4);
1085921a55d8Smrg    E32(0xffffff);
1086921a55d8Smrg    E32(0);
1087921a55d8Smrg    E32(0);
1088921a55d8Smrg    E32(0);
1089921a55d8Smrg
1090921a55d8Smrg    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1091921a55d8Smrg    E32(0);
1092921a55d8Smrg    E32(0);
1093921a55d8Smrg
1094921a55d8Smrg    PACK0(VGT_REUSE_OFF, 2);
1095921a55d8Smrg    E32(0);
1096921a55d8Smrg    E32(0);
1097921a55d8Smrg
1098921a55d8Smrg    PACK0(PA_SU_POINT_SIZE, 17);
1099921a55d8Smrg    E32(0); // PA_SU_POINT_SIZE
1100921a55d8Smrg    E32(0); // PA_SU_POINT_MINMAX
1101921a55d8Smrg    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1102921a55d8Smrg    E32(0); // PA_SC_LINE_STIPPLE
1103921a55d8Smrg    E32(0); // VGT_OUTPUT_PATH_CNTL
1104921a55d8Smrg    E32(0); // VGT_HOS_CNTL
1105921a55d8Smrg    E32(0);
1106921a55d8Smrg    E32(0);
1107921a55d8Smrg    E32(0);
1108921a55d8Smrg    E32(0);
1109921a55d8Smrg    E32(0);
1110921a55d8Smrg    E32(0);
1111921a55d8Smrg    E32(0);
1112921a55d8Smrg    E32(0);
1113921a55d8Smrg    E32(0);
1114921a55d8Smrg    E32(0);
1115921a55d8Smrg    E32(0); // VGT_GS_MODE
1116921a55d8Smrg
1117921a55d8Smrg    EREG(VGT_PRIMITIVEID_EN,                  0);
1118921a55d8Smrg    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1119921a55d8Smrg    EREG(VGT_SHADER_STAGES_EN,          0);
1120921a55d8Smrg
1121921a55d8Smrg    PACK0(VGT_STRMOUT_CONFIG, 2);
1122921a55d8Smrg    E32(0);
1123921a55d8Smrg    E32(0);
1124921a55d8Smrg    END_BATCH();
1125921a55d8Smrg}
1126921a55d8Smrg
1127921a55d8Smrg
1128921a55d8Smrg/*
1129921a55d8Smrg * Commands
1130921a55d8Smrg */
1131921a55d8Smrg
1132921a55d8Smrgvoid
1133921a55d8Smrgevergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1134921a55d8Smrg{
1135921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1136921a55d8Smrg
1137921a55d8Smrg    BEGIN_BATCH(10);
1138921a55d8Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1139921a55d8Smrg    PACK3(IT_INDEX_TYPE, 1);
1140921a55d8Smrg    E32(draw_conf->index_type);
1141921a55d8Smrg    PACK3(IT_NUM_INSTANCES, 1);
1142921a55d8Smrg    E32(draw_conf->num_instances);
1143921a55d8Smrg    PACK3(IT_DRAW_INDEX_AUTO, 2);
1144921a55d8Smrg    E32(draw_conf->num_indices);
1145921a55d8Smrg    E32(draw_conf->vgt_draw_initiator);
1146921a55d8Smrg    END_BATCH();
1147921a55d8Smrg}
1148921a55d8Smrg
1149921a55d8Smrgvoid evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1150921a55d8Smrg{
1151921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1152921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1153921a55d8Smrg    draw_config_t   draw_conf;
1154921a55d8Smrg    vtx_resource_t  vtx_res;
1155921a55d8Smrg
1156921a55d8Smrg    if (accel_state->vbo.vb_start_op == -1)
1157921a55d8Smrg      return;
1158921a55d8Smrg
1159921a55d8Smrg    CLEAR (draw_conf);
1160921a55d8Smrg    CLEAR (vtx_res);
1161921a55d8Smrg
1162921a55d8Smrg    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1163921a55d8Smrg	radeon_ib_discard(pScrn);
1164921a55d8Smrg	radeon_cs_flush_indirect(pScrn);
1165921a55d8Smrg	return;
1166921a55d8Smrg    }
1167921a55d8Smrg
1168921a55d8Smrg    /* Vertex buffer setup */
1169921a55d8Smrg    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1170921a55d8Smrg    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1171921a55d8Smrg    vtx_res.vtx_size_dw     = vtx_size / 4;
1172921a55d8Smrg    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1173921a55d8Smrg    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
1174921a55d8Smrg    vtx_res.bo              = accel_state->vbo.vb_bo;
1175921a55d8Smrg    vtx_res.dst_sel_x       = SQ_SEL_X;
1176921a55d8Smrg    vtx_res.dst_sel_y       = SQ_SEL_Y;
1177921a55d8Smrg    vtx_res.dst_sel_z       = SQ_SEL_Z;
1178921a55d8Smrg    vtx_res.dst_sel_w       = SQ_SEL_W;
1179921a55d8Smrg    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1180921a55d8Smrg
1181921a55d8Smrg    /* Draw */
1182921a55d8Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
1183921a55d8Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1184921a55d8Smrg    draw_conf.num_instances      = 1;
1185921a55d8Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1186921a55d8Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1187921a55d8Smrg
1188921a55d8Smrg    evergreen_draw_auto(pScrn, &draw_conf);
1189921a55d8Smrg
1190921a55d8Smrg    /* sync dst surface */
1191921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1192921a55d8Smrg				  accel_state->dst_size, accel_state->dst_obj.offset,
1193921a55d8Smrg				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1194921a55d8Smrg
1195921a55d8Smrg    accel_state->vbo.vb_start_op = -1;
1196921a55d8Smrg    accel_state->cbuf.vb_start_op = -1;
1197921a55d8Smrg    accel_state->ib_reset_op = 0;
1198921a55d8Smrg
1199921a55d8Smrg}
1200921a55d8Smrg
1201921a55d8Smrg#endif
1202