1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Authors: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *
25de2362d3Smrg */
26de2362d3Smrg#ifdef HAVE_CONFIG_H
27de2362d3Smrg#include "config.h"
28de2362d3Smrg#endif
29de2362d3Smrg
30de2362d3Smrg#include "xf86.h"
31de2362d3Smrg
32de2362d3Smrg#include <errno.h>
33de2362d3Smrg
34de2362d3Smrg#include "radeon.h"
35de2362d3Smrg#include "evergreen_shader.h"
36de2362d3Smrg#include "radeon_reg.h"
37de2362d3Smrg#include "evergreen_reg.h"
38de2362d3Smrg#include "evergreen_state.h"
39de2362d3Smrg
40de2362d3Smrg#include "radeon_vbo.h"
41de2362d3Smrg#include "radeon_exa_shared.h"
42de2362d3Smrg
43de2362d3Smrgstatic const uint32_t EVERGREEN_ROP[16] = {
44de2362d3Smrg    RADEON_ROP3_ZERO, /* GXclear        */
45de2362d3Smrg    RADEON_ROP3_DSa,  /* Gxand          */
46de2362d3Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
47de2362d3Smrg    RADEON_ROP3_S,    /* GXcopy         */
48de2362d3Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
49de2362d3Smrg    RADEON_ROP3_D,    /* GXnoop         */
50de2362d3Smrg    RADEON_ROP3_DSx,  /* GXxor          */
51de2362d3Smrg    RADEON_ROP3_DSo,  /* GXor           */
52de2362d3Smrg    RADEON_ROP3_DSon, /* GXnor          */
53de2362d3Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
54de2362d3Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
55de2362d3Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
56de2362d3Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
57de2362d3Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
58de2362d3Smrg    RADEON_ROP3_DSan, /* GXnand         */
59de2362d3Smrg    RADEON_ROP3_ONE,  /* GXset          */
60de2362d3Smrg};
61de2362d3Smrg
62de2362d3Smrgvoid
63de2362d3Smrgevergreen_start_3d(ScrnInfoPtr pScrn)
64de2362d3Smrg{
65de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
66de2362d3Smrg
67de2362d3Smrg    BEGIN_BATCH(3);
68de2362d3Smrg    PACK3(IT_CONTEXT_CONTROL, 2);
69de2362d3Smrg    E32(0x80000000);
70de2362d3Smrg    E32(0x80000000);
71de2362d3Smrg    END_BATCH();
72de2362d3Smrg
73de2362d3Smrg}
74de2362d3Smrg
75de2362d3Smrgunsigned eg_tile_split(unsigned tile_split)
76de2362d3Smrg{
77de2362d3Smrg	switch (tile_split) {
78de2362d3Smrg	case 64:	tile_split = 0;	break;
79de2362d3Smrg	case 128:	tile_split = 1;	break;
80de2362d3Smrg	case 256:	tile_split = 2;	break;
81de2362d3Smrg	case 512:	tile_split = 3;	break;
8218781e08Smrg	default:
83de2362d3Smrg	case 1024:	tile_split = 4;	break;
84de2362d3Smrg	case 2048:	tile_split = 5;	break;
85de2362d3Smrg	case 4096:	tile_split = 6;	break;
86de2362d3Smrg	}
87de2362d3Smrg	return tile_split;
88de2362d3Smrg}
89de2362d3Smrg
90de2362d3Smrgstatic unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect)
91de2362d3Smrg{
92de2362d3Smrg	switch (macro_tile_aspect) {
93de2362d3Smrg	default:
94de2362d3Smrg	case 1:	macro_tile_aspect = 0;	break;
95de2362d3Smrg	case 2:	macro_tile_aspect = 1;	break;
96de2362d3Smrg	case 4:	macro_tile_aspect = 2;	break;
97de2362d3Smrg	case 8:	macro_tile_aspect = 3;	break;
98de2362d3Smrg	}
99de2362d3Smrg	return macro_tile_aspect;
100de2362d3Smrg}
101de2362d3Smrg
102de2362d3Smrgstatic unsigned eg_bank_wh(unsigned bankwh)
103de2362d3Smrg{
104de2362d3Smrg	switch (bankwh) {
105de2362d3Smrg	default:
106de2362d3Smrg	case 1:	bankwh = 0;	break;
107de2362d3Smrg	case 2:	bankwh = 1;	break;
108de2362d3Smrg	case 4:	bankwh = 2;	break;
109de2362d3Smrg	case 8:	bankwh = 3;	break;
110de2362d3Smrg	}
111de2362d3Smrg	return bankwh;
112de2362d3Smrg}
113de2362d3Smrg
114de2362d3Smrgstatic unsigned eg_nbanks(unsigned nbanks)
115de2362d3Smrg{
116de2362d3Smrg	switch (nbanks) {
117de2362d3Smrg	default:
118de2362d3Smrg	case 2: nbanks = 0; break;
119de2362d3Smrg	case 4: nbanks = 1; break;
120de2362d3Smrg	case 8: nbanks = 2; break;
121de2362d3Smrg	case 16: nbanks = 3; break;
122de2362d3Smrg	}
123de2362d3Smrg	return nbanks;
124de2362d3Smrg}
125de2362d3Smrg
126de2362d3Smrg/*
127de2362d3Smrg * Setup of functional groups
128de2362d3Smrg */
129de2362d3Smrg
130de2362d3Smrg// asic stack/thread/gpr limits - need to query the drm
131de2362d3Smrgstatic void
132de2362d3Smrgevergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
133de2362d3Smrg{
134de2362d3Smrg    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
135de2362d3Smrg    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
136de2362d3Smrg    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
137de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
138de2362d3Smrg
139de2362d3Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
140de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
141de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
142de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
143de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS))
144de2362d3Smrg	sq_config = 0;
145de2362d3Smrg    else
146de2362d3Smrg	sq_config = VC_ENABLE_bit;
147de2362d3Smrg
148de2362d3Smrg    sq_config |= (EXPORT_SRC_C_bit |
149de2362d3Smrg		  (sq_conf->cs_prio << CS_PRIO_shift) |
150de2362d3Smrg		  (sq_conf->ls_prio << LS_PRIO_shift) |
151de2362d3Smrg		  (sq_conf->hs_prio << HS_PRIO_shift) |
152de2362d3Smrg		  (sq_conf->ps_prio << PS_PRIO_shift) |
153de2362d3Smrg		  (sq_conf->vs_prio << VS_PRIO_shift) |
154de2362d3Smrg		  (sq_conf->gs_prio << GS_PRIO_shift) |
155de2362d3Smrg		  (sq_conf->es_prio << ES_PRIO_shift));
156de2362d3Smrg
157de2362d3Smrg    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
158de2362d3Smrg			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
159de2362d3Smrg			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
160de2362d3Smrg    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
161de2362d3Smrg			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
162de2362d3Smrg    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
163de2362d3Smrg			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
164de2362d3Smrg
165de2362d3Smrg    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
166de2362d3Smrg			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
167de2362d3Smrg			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
168de2362d3Smrg			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
169de2362d3Smrg    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
170de2362d3Smrg				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
171de2362d3Smrg
172de2362d3Smrg    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
173de2362d3Smrg				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
174de2362d3Smrg
175de2362d3Smrg    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
176de2362d3Smrg				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
177de2362d3Smrg
178de2362d3Smrg    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
179de2362d3Smrg				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
180de2362d3Smrg
181de2362d3Smrg    BEGIN_BATCH(16);
182de2362d3Smrg    /* disable dyn gprs */
183de2362d3Smrg    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
184de2362d3Smrg    PACK0(SQ_CONFIG, 4);
185de2362d3Smrg    E32(sq_config);
186de2362d3Smrg    E32(sq_gpr_resource_mgmt_1);
187de2362d3Smrg    E32(sq_gpr_resource_mgmt_2);
188de2362d3Smrg    E32(sq_gpr_resource_mgmt_3);
189de2362d3Smrg    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
190de2362d3Smrg    E32(sq_thread_resource_mgmt);
191de2362d3Smrg    E32(sq_thread_resource_mgmt_2);
192de2362d3Smrg    E32(sq_stack_resource_mgmt_1);
193de2362d3Smrg    E32(sq_stack_resource_mgmt_2);
194de2362d3Smrg    E32(sq_stack_resource_mgmt_3);
195de2362d3Smrg    END_BATCH();
196de2362d3Smrg}
197de2362d3Smrg
198de2362d3Smrg/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
199de2362d3Smrg * we use here.
200de2362d3Smrg */
201de2362d3Smrgvoid
202de2362d3Smrgevergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
203de2362d3Smrg{
204de2362d3Smrg    uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
205de2362d3Smrg    unsigned pitch, slice, w, h, array_mode, nbanks;
206de2362d3Smrg    uint32_t tile_split, macro_aspect, bankw, bankh;
207de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
208de2362d3Smrg
209de2362d3Smrg    if (cb_conf->surface) {
210de2362d3Smrg	switch (cb_conf->surface->level[0].mode) {
211de2362d3Smrg	case RADEON_SURF_MODE_1D:
212de2362d3Smrg		array_mode = 2;
213de2362d3Smrg		break;
214de2362d3Smrg	case RADEON_SURF_MODE_2D:
215de2362d3Smrg		array_mode = 4;
216de2362d3Smrg		break;
217de2362d3Smrg	default:
218de2362d3Smrg		array_mode = 0;
219de2362d3Smrg		break;
220de2362d3Smrg	}
221de2362d3Smrg	w = cb_conf->surface->level[0].npix_x;
222de2362d3Smrg	h = cb_conf->surface->level[0].npix_y;
223de2362d3Smrg	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
224de2362d3Smrg	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
225de2362d3Smrg	tile_split = cb_conf->surface->tile_split;
226de2362d3Smrg	macro_aspect = cb_conf->surface->mtilea;
227de2362d3Smrg	bankw = cb_conf->surface->bankw;
228de2362d3Smrg	bankh = cb_conf->surface->bankh;
229de2362d3Smrg	tile_split = eg_tile_split(tile_split);
230de2362d3Smrg	macro_aspect = eg_macro_tile_aspect(macro_aspect);
231de2362d3Smrg	bankw = eg_bank_wh(bankw);
232de2362d3Smrg	bankh = eg_bank_wh(bankh);
23318781e08Smrg    } else {
234de2362d3Smrg	pitch = (cb_conf->w / 8) - 1;
235de2362d3Smrg	h = RADEON_ALIGN(cb_conf->h, 8);
236de2362d3Smrg	slice = ((cb_conf->w * h) / 64) - 1;
237de2362d3Smrg	array_mode = cb_conf->array_mode;
238de2362d3Smrg	w = cb_conf->w;
239de2362d3Smrg	tile_split = 4;
240de2362d3Smrg	macro_aspect = 0;
241de2362d3Smrg	bankw = 0;
242de2362d3Smrg	bankh = 0;
243de2362d3Smrg    }
244de2362d3Smrg    nbanks = info->num_banks;
245de2362d3Smrg    nbanks = eg_nbanks(nbanks);
246de2362d3Smrg
247de2362d3Smrg    cb_color_attrib |= (tile_split << CB_COLOR0_ATTRIB__TILE_SPLIT_shift)|
248de2362d3Smrg		       (nbanks << CB_COLOR0_ATTRIB__NUM_BANKS_shift) |
249de2362d3Smrg		       (bankw << CB_COLOR0_ATTRIB__BANK_WIDTH_shift) |
250de2362d3Smrg		       (bankh << CB_COLOR0_ATTRIB__BANK_HEIGHT_shift) |
251de2362d3Smrg		       (macro_aspect << CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift);
252de2362d3Smrg    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
253de2362d3Smrg		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
254de2362d3Smrg		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
255de2362d3Smrg		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
256de2362d3Smrg		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
257de2362d3Smrg		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
258de2362d3Smrg		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
259de2362d3Smrg    if (cb_conf->blend_clamp)
260de2362d3Smrg	cb_color_info |= BLEND_CLAMP_bit;
261de2362d3Smrg    if (cb_conf->fast_clear)
262de2362d3Smrg	cb_color_info |= FAST_CLEAR_bit;
263de2362d3Smrg    if (cb_conf->compression)
264de2362d3Smrg	cb_color_info |= COMPRESSION_bit;
265de2362d3Smrg    if (cb_conf->blend_bypass)
266de2362d3Smrg	cb_color_info |= BLEND_BYPASS_bit;
267de2362d3Smrg    if (cb_conf->simple_float)
268de2362d3Smrg	cb_color_info |= SIMPLE_FLOAT_bit;
269de2362d3Smrg    if (cb_conf->round_mode)
270de2362d3Smrg	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
271de2362d3Smrg    if (cb_conf->tile_compact)
272de2362d3Smrg	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
273de2362d3Smrg    if (cb_conf->rat)
274de2362d3Smrg	cb_color_info |= RAT_bit;
275de2362d3Smrg
276de2362d3Smrg    /* bit 4 needs to be set for linear and depth/stencil surfaces */
277de2362d3Smrg    if (cb_conf->non_disp_tiling)
278de2362d3Smrg	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
279de2362d3Smrg
280de2362d3Smrg    switch (cb_conf->resource_type) {
281de2362d3Smrg    case BUFFER:
282de2362d3Smrg	/* number of elements in the surface */
283de2362d3Smrg	cb_color_dim = pitch * slice;
284de2362d3Smrg	break;
285de2362d3Smrg    default:
286de2362d3Smrg	/* w/h of the surface */
287de2362d3Smrg	cb_color_dim = (((w - 1) << WIDTH_MAX_shift) |
288de2362d3Smrg			((cb_conf->h - 1) << HEIGHT_MAX_shift));
289de2362d3Smrg	break;
290de2362d3Smrg    }
291de2362d3Smrg
292de2362d3Smrg    BEGIN_BATCH(3 + 2);
293de2362d3Smrg    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
294de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
295de2362d3Smrg    END_BATCH();
296de2362d3Smrg
297de2362d3Smrg    /* Set CMASK & FMASK buffer to the offset of color buffer as
298de2362d3Smrg     * we don't use those this shouldn't cause any issue and we
299de2362d3Smrg     * then have a valid cmd stream
300de2362d3Smrg     */
301de2362d3Smrg    BEGIN_BATCH(3 + 2);
302de2362d3Smrg    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
303de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
304de2362d3Smrg    END_BATCH();
305de2362d3Smrg    BEGIN_BATCH(3 + 2);
306de2362d3Smrg    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
307de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
308de2362d3Smrg    END_BATCH();
309de2362d3Smrg
310de2362d3Smrg    /* tiling config */
311de2362d3Smrg    BEGIN_BATCH(3 + 2);
312de2362d3Smrg    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
313de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
314de2362d3Smrg    END_BATCH();
315de2362d3Smrg    BEGIN_BATCH(3 + 2);
316de2362d3Smrg    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
317de2362d3Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
318de2362d3Smrg    END_BATCH();
319de2362d3Smrg
320de2362d3Smrg    BEGIN_BATCH(33);
321de2362d3Smrg    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
322de2362d3Smrg    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
323de2362d3Smrg    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
324de2362d3Smrg    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
325de2362d3Smrg    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
326de2362d3Smrg    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
327de2362d3Smrg    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
328de2362d3Smrg    E32(0);
329de2362d3Smrg    E32(0);
330de2362d3Smrg    E32(0);
331de2362d3Smrg    E32(0);
332de2362d3Smrg    EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
333de2362d3Smrg    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
334de2362d3Smrg					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
335de2362d3Smrg    EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
336de2362d3Smrg    END_BATCH();
33718781e08Smrg}
33818781e08Smrg
33918781e08Smrgvoid evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color)
34018781e08Smrg{
34118781e08Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
342de2362d3Smrg
34318781e08Smrg    BEGIN_BATCH(2 + 4);
34418781e08Smrg    PACK0(CB_BLEND_RED, 4);
34518781e08Smrg    EFLOAT(color[0]); /* R */
34618781e08Smrg    EFLOAT(color[1]); /* G */
34718781e08Smrg    EFLOAT(color[2]); /* B */
34818781e08Smrg    EFLOAT(color[3]); /* A */
34918781e08Smrg    END_BATCH();
350de2362d3Smrg}
351de2362d3Smrg
352de2362d3Smrgstatic void
353de2362d3Smrgevergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
354de2362d3Smrg			      uint32_t size, uint64_t mc_addr,
355de2362d3Smrg			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
356de2362d3Smrg{
357de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
358de2362d3Smrg    uint32_t cp_coher_size;
359de2362d3Smrg    if (size == 0xffffffff)
360de2362d3Smrg	cp_coher_size = 0xffffffff;
361de2362d3Smrg    else
362de2362d3Smrg	cp_coher_size = ((size + 255) >> 8);
363de2362d3Smrg
364de2362d3Smrg    BEGIN_BATCH(5 + 2);
365de2362d3Smrg    PACK3(IT_SURFACE_SYNC, 4);
366de2362d3Smrg    E32(sync_type);
367de2362d3Smrg    E32(cp_coher_size);
368de2362d3Smrg    E32((mc_addr >> 8));
369de2362d3Smrg    E32(10); /* poll interval */
370de2362d3Smrg    RELOC_BATCH(bo, rdomains, wdomain);
371de2362d3Smrg    END_BATCH();
372de2362d3Smrg}
373de2362d3Smrg
374de2362d3Smrg/* inserts a wait for vline in the command stream */
375de2362d3Smrgvoid evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
376de2362d3Smrg				  xf86CrtcPtr crtc, int start, int stop)
377de2362d3Smrg{
378de2362d3Smrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
379de2362d3Smrg    drmmode_crtc_private_ptr drmmode_crtc;
380de2362d3Smrg
381de2362d3Smrg    if (!crtc)
382de2362d3Smrg        return;
383de2362d3Smrg
384de2362d3Smrg    drmmode_crtc = crtc->driver_private;
385de2362d3Smrg
386de2362d3Smrg    if (!crtc->enabled)
387de2362d3Smrg        return;
388de2362d3Smrg
38918781e08Smrg    if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
39018781e08Smrg        return;
391de2362d3Smrg
392de2362d3Smrg    start = max(start, crtc->y);
393de2362d3Smrg    stop = min(stop, crtc->y + crtc->mode.VDisplay);
394de2362d3Smrg
395de2362d3Smrg    if (start >= stop)
396de2362d3Smrg        return;
397de2362d3Smrg
398de2362d3Smrg    BEGIN_BATCH(11);
399de2362d3Smrg    /* set the VLINE range */
400de2362d3Smrg    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
401de2362d3Smrg	 (start << EVERGREEN_VLINE_START_SHIFT) |
402de2362d3Smrg	 (stop << EVERGREEN_VLINE_END_SHIFT));
403de2362d3Smrg
404de2362d3Smrg    /* tell the CP to poll the VLINE state register */
405de2362d3Smrg    PACK3(IT_WAIT_REG_MEM, 6);
406de2362d3Smrg    E32(IT_WAIT_REG | IT_WAIT_EQ);
407de2362d3Smrg    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
408de2362d3Smrg    E32(0);
409de2362d3Smrg    E32(0);                          // Ref value
410de2362d3Smrg    E32(EVERGREEN_VLINE_STAT);    // Mask
411de2362d3Smrg    E32(10);                         // Wait interval
412de2362d3Smrg    /* add crtc reloc */
413de2362d3Smrg    PACK3(IT_NOP, 1);
414de2362d3Smrg    E32(drmmode_crtc->mode_crtc->crtc_id);
415de2362d3Smrg    END_BATCH();
416de2362d3Smrg}
417de2362d3Smrg
418de2362d3Smrgvoid
419de2362d3Smrgevergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
420de2362d3Smrg{
421de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
422de2362d3Smrg
423de2362d3Smrg    BEGIN_BATCH(8);
424de2362d3Smrg    /* Interpolator setup */
425de2362d3Smrg    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
426de2362d3Smrg    PACK0(SPI_PS_IN_CONTROL_0, 3);
427de2362d3Smrg    E32(((num_interp << NUM_INTERP_shift) |
428de2362d3Smrg	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
429de2362d3Smrg    E32(0); // SPI_PS_IN_CONTROL_1
430de2362d3Smrg    E32(0); // SPI_INTERP_CONTROL_0
431de2362d3Smrg    END_BATCH();
432de2362d3Smrg}
433de2362d3Smrg
434de2362d3Smrgvoid
435de2362d3Smrgevergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
436de2362d3Smrg{
437de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
438de2362d3Smrg    uint32_t sq_pgm_resources;
439de2362d3Smrg
440de2362d3Smrg    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
441de2362d3Smrg			(fs_conf->stack_size << STACK_SIZE_shift));
442de2362d3Smrg
443de2362d3Smrg    if (fs_conf->dx10_clamp)
444de2362d3Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
445de2362d3Smrg
446de2362d3Smrg    BEGIN_BATCH(3 + 2);
447de2362d3Smrg    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
448de2362d3Smrg    RELOC_BATCH(fs_conf->bo, domain, 0);
449de2362d3Smrg    END_BATCH();
450de2362d3Smrg
451de2362d3Smrg    BEGIN_BATCH(3);
452de2362d3Smrg    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
453de2362d3Smrg    END_BATCH();
454de2362d3Smrg}
455de2362d3Smrg
456de2362d3Smrg/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
457de2362d3Smrg * but none that we use here.
458de2362d3Smrg */
459de2362d3Smrgvoid
460de2362d3Smrgevergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
461de2362d3Smrg{
462de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
463de2362d3Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
464de2362d3Smrg
465de2362d3Smrg    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
466de2362d3Smrg			(vs_conf->stack_size << STACK_SIZE_shift));
467de2362d3Smrg
468de2362d3Smrg    if (vs_conf->dx10_clamp)
469de2362d3Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
470de2362d3Smrg    if (vs_conf->uncached_first_inst)
471de2362d3Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
472de2362d3Smrg
473de2362d3Smrg    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
474de2362d3Smrg			  (vs_conf->double_round << DOUBLE_ROUND_shift));
475de2362d3Smrg
476de2362d3Smrg    if (vs_conf->allow_sdi)
477de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
478de2362d3Smrg    if (vs_conf->allow_sd0)
479de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
480de2362d3Smrg    if (vs_conf->allow_ddi)
481de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
482de2362d3Smrg    if (vs_conf->allow_ddo)
483de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
484de2362d3Smrg
485de2362d3Smrg    /* flush SQ cache */
486de2362d3Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
487de2362d3Smrg				  vs_conf->shader_size, vs_conf->shader_addr,
488de2362d3Smrg				  vs_conf->bo, domain, 0);
489de2362d3Smrg
490de2362d3Smrg    BEGIN_BATCH(3 + 2);
491de2362d3Smrg    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
492de2362d3Smrg    RELOC_BATCH(vs_conf->bo, domain, 0);
493de2362d3Smrg    END_BATCH();
494de2362d3Smrg
495de2362d3Smrg    BEGIN_BATCH(4);
496de2362d3Smrg    PACK0(SQ_PGM_RESOURCES_VS, 2);
497de2362d3Smrg    E32(sq_pgm_resources);
498de2362d3Smrg    E32(sq_pgm_resources_2);
499de2362d3Smrg    END_BATCH();
500de2362d3Smrg}
501de2362d3Smrg
502de2362d3Smrg/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
503de2362d3Smrg * but none that we use here.
504de2362d3Smrg */
505de2362d3Smrgvoid
506de2362d3Smrgevergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
507de2362d3Smrg{
508de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
509de2362d3Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
510de2362d3Smrg
511de2362d3Smrg    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
512de2362d3Smrg			(ps_conf->stack_size << STACK_SIZE_shift));
513de2362d3Smrg
514de2362d3Smrg    if (ps_conf->dx10_clamp)
515de2362d3Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
516de2362d3Smrg    if (ps_conf->uncached_first_inst)
517de2362d3Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
518de2362d3Smrg    if (ps_conf->clamp_consts)
519de2362d3Smrg	sq_pgm_resources |= CLAMP_CONSTS_bit;
520de2362d3Smrg
521de2362d3Smrg    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
522de2362d3Smrg			  (ps_conf->double_round << DOUBLE_ROUND_shift));
523de2362d3Smrg
524de2362d3Smrg    if (ps_conf->allow_sdi)
525de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
526de2362d3Smrg    if (ps_conf->allow_sd0)
527de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
528de2362d3Smrg    if (ps_conf->allow_ddi)
529de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
530de2362d3Smrg    if (ps_conf->allow_ddo)
531de2362d3Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
532de2362d3Smrg
533de2362d3Smrg    /* flush SQ cache */
534de2362d3Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
535de2362d3Smrg				  ps_conf->shader_size, ps_conf->shader_addr,
536de2362d3Smrg				  ps_conf->bo, domain, 0);
537de2362d3Smrg
538de2362d3Smrg    BEGIN_BATCH(3 + 2);
539de2362d3Smrg    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
540de2362d3Smrg    RELOC_BATCH(ps_conf->bo, domain, 0);
541de2362d3Smrg    END_BATCH();
542de2362d3Smrg
543de2362d3Smrg    BEGIN_BATCH(5);
544de2362d3Smrg    PACK0(SQ_PGM_RESOURCES_PS, 3);
545de2362d3Smrg    E32(sq_pgm_resources);
546de2362d3Smrg    E32(sq_pgm_resources_2);
547de2362d3Smrg    E32(ps_conf->export_mode);
548de2362d3Smrg    END_BATCH();
549de2362d3Smrg}
550de2362d3Smrg
551de2362d3Smrgvoid
552de2362d3Smrgevergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
553de2362d3Smrg{
554de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
555de2362d3Smrg    /* size reg is units of 16 consts (4 dwords each) */
556de2362d3Smrg    uint32_t size = const_conf->size_bytes >> 8;
557de2362d3Smrg
558de2362d3Smrg    if (size == 0)
559de2362d3Smrg	size = 1;
560de2362d3Smrg
561de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
562de2362d3Smrg    {
56318781e08Smrg	    uint32_t count = size << 6, *p = const_conf->cpu_ptr;
564de2362d3Smrg
565de2362d3Smrg	    while(count--) {
566de2362d3Smrg		    *p = cpu_to_le32(*p);
567de2362d3Smrg		    p++;
568de2362d3Smrg	    }
569de2362d3Smrg    }
570de2362d3Smrg#endif
571de2362d3Smrg
572de2362d3Smrg    /* flush SQ cache */
573de2362d3Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
574de2362d3Smrg				  const_conf->size_bytes, const_conf->const_addr,
575de2362d3Smrg				  const_conf->bo, domain, 0);
576de2362d3Smrg
577de2362d3Smrg    switch (const_conf->type) {
578de2362d3Smrg    case SHADER_TYPE_VS:
579de2362d3Smrg	BEGIN_BATCH(3);
580de2362d3Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
581de2362d3Smrg	END_BATCH();
582de2362d3Smrg	BEGIN_BATCH(3 + 2);
583de2362d3Smrg	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
584de2362d3Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
585de2362d3Smrg	END_BATCH();
586de2362d3Smrg	break;
587de2362d3Smrg    case SHADER_TYPE_PS:
588de2362d3Smrg	BEGIN_BATCH(3);
589de2362d3Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
590de2362d3Smrg	END_BATCH();
591de2362d3Smrg	BEGIN_BATCH(3 + 2);
592de2362d3Smrg	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
593de2362d3Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
594de2362d3Smrg	END_BATCH();
595de2362d3Smrg	break;
596de2362d3Smrg    default:
597de2362d3Smrg	ErrorF("Unsupported const type %d\n", const_conf->type);
598de2362d3Smrg	break;
599de2362d3Smrg    }
600de2362d3Smrg
601de2362d3Smrg}
602de2362d3Smrg
603de2362d3Smrgvoid
604de2362d3Smrgevergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
605de2362d3Smrg{
606de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
607de2362d3Smrg    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
608de2362d3Smrg     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
609de2362d3Smrg     */
610de2362d3Smrg    BEGIN_BATCH(3);
611de2362d3Smrg    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
612de2362d3Smrg    END_BATCH();
613de2362d3Smrg}
614de2362d3Smrg
615de2362d3Smrg/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
616de2362d3Smrg * but none that we use here.
617de2362d3Smrg */
618de2362d3Smrgstatic void
619de2362d3Smrgevergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
620de2362d3Smrg{
621de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
622de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
623de2362d3Smrg    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
624de2362d3Smrg
625de2362d3Smrg    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
626de2362d3Smrg			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
627de2362d3Smrg			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
628de2362d3Smrg			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
629de2362d3Smrg			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
630de2362d3Smrg    if (res->clamp_x)
631de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
632de2362d3Smrg
633de2362d3Smrg    if (res->format_comp_all)
634de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
635de2362d3Smrg
636de2362d3Smrg    if (res->srf_mode_all)
637de2362d3Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
638de2362d3Smrg
639de2362d3Smrg    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
640de2362d3Smrg			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
641de2362d3Smrg			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
642de2362d3Smrg			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
643de2362d3Smrg
644de2362d3Smrg    if (res->uncached)
645de2362d3Smrg	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
646de2362d3Smrg
647de2362d3Smrg    /* XXX ??? */
648de2362d3Smrg    sq_vtx_constant_word4 = 0;
649de2362d3Smrg
650de2362d3Smrg    /* flush vertex cache */
651de2362d3Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
652de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
653de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
654de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
655de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS) ||
656de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_CAYMAN) ||
657de2362d3Smrg	(info->ChipFamily == CHIP_FAMILY_ARUBA))
658de2362d3Smrg	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
65918781e08Smrg				      accel_state->vbo.vb_offset, 0,
660de2362d3Smrg				      res->bo,
661de2362d3Smrg				      domain, 0);
662de2362d3Smrg    else
663de2362d3Smrg	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
66418781e08Smrg				      accel_state->vbo.vb_offset, 0,
665de2362d3Smrg				      res->bo,
666de2362d3Smrg				      domain, 0);
667de2362d3Smrg
668de2362d3Smrg    BEGIN_BATCH(10 + 2);
669de2362d3Smrg    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
670de2362d3Smrg    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
671de2362d3Smrg    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
672de2362d3Smrg    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
673de2362d3Smrg    E32(sq_vtx_constant_word3);		// 3: swizzles
674de2362d3Smrg    E32(sq_vtx_constant_word4);		// 4: num elements
675de2362d3Smrg    E32(0);							// 5: n/a
676de2362d3Smrg    E32(0);							// 6: n/a
677de2362d3Smrg    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
678de2362d3Smrg    RELOC_BATCH(res->bo, domain, 0);
679de2362d3Smrg    END_BATCH();
680de2362d3Smrg}
681de2362d3Smrg
682de2362d3Smrg/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
683de2362d3Smrg * but none that we use here.
684de2362d3Smrg */
685de2362d3Smrgvoid
686de2362d3Smrgevergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
687de2362d3Smrg{
688de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
689de2362d3Smrg    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
690de2362d3Smrg    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
691de2362d3Smrg    uint32_t array_mode, pitch, tile_split, macro_aspect, bankw, bankh, nbanks;
692de2362d3Smrg
693de2362d3Smrg    if (tex_res->surface) {
694de2362d3Smrg	switch (tex_res->surface->level[0].mode) {
695de2362d3Smrg	case RADEON_SURF_MODE_1D:
696de2362d3Smrg		array_mode = 2;
697de2362d3Smrg		break;
698de2362d3Smrg	case RADEON_SURF_MODE_2D:
699de2362d3Smrg		array_mode = 4;
700de2362d3Smrg		break;
701de2362d3Smrg	default:
702de2362d3Smrg		array_mode = 0;
703de2362d3Smrg		break;
704de2362d3Smrg	}
705de2362d3Smrg	pitch = tex_res->surface->level[0].nblk_x >> 3;
706de2362d3Smrg	tile_split = tex_res->surface->tile_split;
707de2362d3Smrg	macro_aspect = tex_res->surface->mtilea;
708de2362d3Smrg	bankw = tex_res->surface->bankw;
709de2362d3Smrg	bankh = tex_res->surface->bankh;
710de2362d3Smrg	tile_split = eg_tile_split(tile_split);
711de2362d3Smrg	macro_aspect = eg_macro_tile_aspect(macro_aspect);
712de2362d3Smrg	bankw = eg_bank_wh(bankw);
713de2362d3Smrg	bankh = eg_bank_wh(bankh);
71418781e08Smrg    } else {
715de2362d3Smrg	array_mode = tex_res->array_mode;
716de2362d3Smrg	pitch = (tex_res->pitch + 7) >> 3;
717de2362d3Smrg	tile_split = 4;
718de2362d3Smrg	macro_aspect = 0;
719de2362d3Smrg	bankw = 0;
720de2362d3Smrg	bankh = 0;
721de2362d3Smrg    }
722de2362d3Smrg    nbanks = info->num_banks;
723de2362d3Smrg    nbanks = eg_nbanks(nbanks);
724de2362d3Smrg
725de2362d3Smrg    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
726de2362d3Smrg
727de2362d3Smrg    if (tex_res->w)
728de2362d3Smrg	sq_tex_resource_word0 |= ( ((pitch - 1) << PITCH_shift) |
729de2362d3Smrg				   ((tex_res->w - 1) << TEX_WIDTH_shift) );
730de2362d3Smrg
731de2362d3Smrg    if (tex_res->tile_type)
732de2362d3Smrg	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
733de2362d3Smrg
734de2362d3Smrg    sq_tex_resource_word1 = (array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
735de2362d3Smrg
736de2362d3Smrg    if (tex_res->h)
737de2362d3Smrg	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
738de2362d3Smrg    if (tex_res->depth)
739de2362d3Smrg	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
740de2362d3Smrg
741de2362d3Smrg    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
742de2362d3Smrg			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
743de2362d3Smrg			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
744de2362d3Smrg			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
745de2362d3Smrg			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
746de2362d3Smrg			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
747de2362d3Smrg			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
748de2362d3Smrg			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
749de2362d3Smrg			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
750de2362d3Smrg			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
751de2362d3Smrg			     (tex_res->base_level << BASE_LEVEL_shift));
752de2362d3Smrg
753de2362d3Smrg    if (tex_res->srf_mode_all)
754de2362d3Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
755de2362d3Smrg    if (tex_res->force_degamma)
756de2362d3Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
757de2362d3Smrg
758de2362d3Smrg    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
759de2362d3Smrg			     (tex_res->base_array << BASE_ARRAY_shift) |
760de2362d3Smrg			     (tex_res->last_array << LAST_ARRAY_shift));
761de2362d3Smrg
762de2362d3Smrg    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
763de2362d3Smrg			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
764de2362d3Smrg			     (tile_split << SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift));
765de2362d3Smrg
766de2362d3Smrg    if (tex_res->interlaced)
767de2362d3Smrg	sq_tex_resource_word6 |= INTERLACED_bit;
768de2362d3Smrg
769de2362d3Smrg    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
770de2362d3Smrg			     (macro_aspect << SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift) |
771de2362d3Smrg			     (nbanks << SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift) |
772de2362d3Smrg			     (bankw << SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift) |
773de2362d3Smrg			     (bankh << SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift) |
774de2362d3Smrg			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
775de2362d3Smrg
776de2362d3Smrg    /* flush texture cache */
777de2362d3Smrg    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
778de2362d3Smrg				  tex_res->size, tex_res->base,
779de2362d3Smrg				  tex_res->bo, domain, 0);
780de2362d3Smrg
781de2362d3Smrg    BEGIN_BATCH(10 + 4);
782de2362d3Smrg    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
783de2362d3Smrg    E32(sq_tex_resource_word0);
784de2362d3Smrg    E32(sq_tex_resource_word1);
785de2362d3Smrg    E32(((tex_res->base) >> 8));
786de2362d3Smrg    E32(((tex_res->mip_base) >> 8));
787de2362d3Smrg    E32(sq_tex_resource_word4);
788de2362d3Smrg    E32(sq_tex_resource_word5);
789de2362d3Smrg    E32(sq_tex_resource_word6);
790de2362d3Smrg    E32(sq_tex_resource_word7);
791de2362d3Smrg    RELOC_BATCH(tex_res->bo, domain, 0);
792de2362d3Smrg    RELOC_BATCH(tex_res->mip_bo, domain, 0);
793de2362d3Smrg    END_BATCH();
794de2362d3Smrg}
795de2362d3Smrg
796de2362d3Smrg/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
797de2362d3Smrg * but none that we use here.
798de2362d3Smrg */
799de2362d3Smrgvoid
800de2362d3Smrgevergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
801de2362d3Smrg{
802de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
803de2362d3Smrg    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
804de2362d3Smrg
805de2362d3Smrg    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
806de2362d3Smrg			    (s->clamp_y       << CLAMP_Y_shift)					|
807de2362d3Smrg			    (s->clamp_z       << CLAMP_Z_shift)					|
808de2362d3Smrg			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
809de2362d3Smrg			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
810de2362d3Smrg			    (s->z_filter      << Z_FILTER_shift)	|
811de2362d3Smrg			    (s->mip_filter    << MIP_FILTER_shift)				|
812de2362d3Smrg			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
813de2362d3Smrg			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
814de2362d3Smrg			    (s->chroma_key    << CHROMA_KEY_shift));
815de2362d3Smrg
816de2362d3Smrg    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
817de2362d3Smrg			    (s->max_lod       << MAX_LOD_shift)					|
818de2362d3Smrg			    (s->perf_mip      << PERF_MIP_shift)	|
819de2362d3Smrg			    (s->perf_z        << PERF_Z_shift));
820de2362d3Smrg
821de2362d3Smrg
822de2362d3Smrg    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
823de2362d3Smrg			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
824de2362d3Smrg
825de2362d3Smrg    if (s->mc_coord_truncate)
826de2362d3Smrg	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
827de2362d3Smrg    if (s->force_degamma)
828de2362d3Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
829de2362d3Smrg    if (s->truncate_coord)
830de2362d3Smrg	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
831de2362d3Smrg    if (s->disable_cube_wrap)
832de2362d3Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
833de2362d3Smrg    if (s->type)
834de2362d3Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
835de2362d3Smrg
836de2362d3Smrg    BEGIN_BATCH(5);
837de2362d3Smrg    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
838de2362d3Smrg    E32(sq_tex_sampler_word0);
839de2362d3Smrg    E32(sq_tex_sampler_word1);
840de2362d3Smrg    E32(sq_tex_sampler_word2);
841de2362d3Smrg    END_BATCH();
842de2362d3Smrg}
843de2362d3Smrg
844de2362d3Smrg/* workarounds for hw bugs in eg+ */
845de2362d3Smrg/* only affects screen/window/generic/vport.  cliprects are not affected */
846de2362d3Smrgstatic void
847de2362d3Smrgevergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2)
848de2362d3Smrg{
849de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
850de2362d3Smrg
851de2362d3Smrg    /* all eg+ asics */
852de2362d3Smrg    if (*x2 == 0)
853de2362d3Smrg	*x1 = 1;
854de2362d3Smrg    if (*y2 == 0)
855de2362d3Smrg	*y1 = 1;
856de2362d3Smrg
857de2362d3Smrg    /* cayman/tn only */
858de2362d3Smrg    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
859de2362d3Smrg	/* cliprects aren't affected so we can use them to clip if we need
860de2362d3Smrg	 * a true 1x1 clip region
861de2362d3Smrg	 */
862de2362d3Smrg	if ((*x2 == 1) && (*y2 == 1))
863de2362d3Smrg	    *x2 = 2;
864de2362d3Smrg    }
865de2362d3Smrg}
866de2362d3Smrg
867de2362d3Smrg//XXX deal with clip offsets in clip setup
868de2362d3Smrgvoid
869de2362d3Smrgevergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
870de2362d3Smrg{
871de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
872de2362d3Smrg
873de2362d3Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
874de2362d3Smrg
875de2362d3Smrg    BEGIN_BATCH(4);
876de2362d3Smrg    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
877de2362d3Smrg    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
878de2362d3Smrg	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
879de2362d3Smrg    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
880de2362d3Smrg	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
881de2362d3Smrg    END_BATCH();
882de2362d3Smrg}
883de2362d3Smrg
884de2362d3Smrgvoid
885de2362d3Smrgevergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
886de2362d3Smrg{
887de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
888de2362d3Smrg
889de2362d3Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
890de2362d3Smrg
891de2362d3Smrg    BEGIN_BATCH(4);
892de2362d3Smrg    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
893de2362d3Smrg    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
894de2362d3Smrg	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
895de2362d3Smrg	 WINDOW_OFFSET_DISABLE_bit));
896de2362d3Smrg    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
897de2362d3Smrg	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
898de2362d3Smrg    END_BATCH();
899de2362d3Smrg}
900de2362d3Smrg
901de2362d3Smrgvoid
902de2362d3Smrgevergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
903de2362d3Smrg{
904de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
905de2362d3Smrg
906de2362d3Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
907de2362d3Smrg
908de2362d3Smrg    BEGIN_BATCH(4);
909de2362d3Smrg    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
910de2362d3Smrg    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
911de2362d3Smrg	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
912de2362d3Smrg	 WINDOW_OFFSET_DISABLE_bit));
913de2362d3Smrg    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
914de2362d3Smrg	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
915de2362d3Smrg    END_BATCH();
916de2362d3Smrg}
917de2362d3Smrg
918de2362d3Smrgvoid
919de2362d3Smrgevergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
920de2362d3Smrg{
921de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
922de2362d3Smrg
923de2362d3Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
924de2362d3Smrg
925de2362d3Smrg    BEGIN_BATCH(4);
926de2362d3Smrg    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
927de2362d3Smrg    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
928de2362d3Smrg	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
929de2362d3Smrg	 WINDOW_OFFSET_DISABLE_bit));
930de2362d3Smrg    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
931de2362d3Smrg	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
932de2362d3Smrg    END_BATCH();
933de2362d3Smrg}
934de2362d3Smrg
935de2362d3Smrgvoid
936de2362d3Smrgevergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
937de2362d3Smrg{
938de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
939de2362d3Smrg
940de2362d3Smrg    BEGIN_BATCH(4);
941de2362d3Smrg    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
942de2362d3Smrg    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
943de2362d3Smrg	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
944de2362d3Smrg    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
945de2362d3Smrg	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
946de2362d3Smrg    END_BATCH();
947de2362d3Smrg}
948de2362d3Smrg
949de2362d3Smrg/*
950de2362d3Smrg * Setup of default state
951de2362d3Smrg */
952de2362d3Smrg
953de2362d3Smrgvoid
954de2362d3Smrgevergreen_set_default_state(ScrnInfoPtr pScrn)
955de2362d3Smrg{
956de2362d3Smrg    tex_resource_t tex_res;
957de2362d3Smrg    shader_config_t fs_conf;
958de2362d3Smrg    sq_config_t sq_conf;
959de2362d3Smrg    int i;
960de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
961de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
962de2362d3Smrg
963de2362d3Smrg    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
964de2362d3Smrg	cayman_set_default_state(pScrn);
965de2362d3Smrg	return;
966de2362d3Smrg    }
967de2362d3Smrg
968de2362d3Smrg    if (accel_state->XInited3D)
969de2362d3Smrg	return;
970de2362d3Smrg
971de2362d3Smrg    memset(&tex_res, 0, sizeof(tex_resource_t));
972de2362d3Smrg    memset(&fs_conf, 0, sizeof(shader_config_t));
973de2362d3Smrg
974de2362d3Smrg    accel_state->XInited3D = TRUE;
975de2362d3Smrg
976de2362d3Smrg    evergreen_start_3d(pScrn);
977de2362d3Smrg
978de2362d3Smrg    /* SQ */
979de2362d3Smrg    sq_conf.ps_prio = 0;
980de2362d3Smrg    sq_conf.vs_prio = 1;
981de2362d3Smrg    sq_conf.gs_prio = 2;
982de2362d3Smrg    sq_conf.es_prio = 3;
983de2362d3Smrg    sq_conf.hs_prio = 0;
984de2362d3Smrg    sq_conf.ls_prio = 0;
985de2362d3Smrg    sq_conf.cs_prio = 0;
986de2362d3Smrg
987de2362d3Smrg    switch (info->ChipFamily) {
988de2362d3Smrg    case CHIP_FAMILY_CEDAR:
989de2362d3Smrg    default:
990de2362d3Smrg	sq_conf.num_ps_gprs = 93;
991de2362d3Smrg	sq_conf.num_vs_gprs = 46;
992de2362d3Smrg	sq_conf.num_temp_gprs = 4;
993de2362d3Smrg	sq_conf.num_gs_gprs = 31;
994de2362d3Smrg	sq_conf.num_es_gprs = 31;
995de2362d3Smrg	sq_conf.num_hs_gprs = 23;
996de2362d3Smrg	sq_conf.num_ls_gprs = 23;
997de2362d3Smrg	sq_conf.num_ps_threads = 96;
998de2362d3Smrg	sq_conf.num_vs_threads = 16;
999de2362d3Smrg	sq_conf.num_gs_threads = 16;
1000de2362d3Smrg	sq_conf.num_es_threads = 16;
1001de2362d3Smrg	sq_conf.num_hs_threads = 16;
1002de2362d3Smrg	sq_conf.num_ls_threads = 16;
1003de2362d3Smrg	sq_conf.num_ps_stack_entries = 42;
1004de2362d3Smrg	sq_conf.num_vs_stack_entries = 42;
1005de2362d3Smrg	sq_conf.num_gs_stack_entries = 42;
1006de2362d3Smrg	sq_conf.num_es_stack_entries = 42;
1007de2362d3Smrg	sq_conf.num_hs_stack_entries = 42;
1008de2362d3Smrg	sq_conf.num_ls_stack_entries = 42;
1009de2362d3Smrg	break;
1010de2362d3Smrg    case CHIP_FAMILY_REDWOOD:
1011de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1012de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1013de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1014de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1015de2362d3Smrg	sq_conf.num_es_gprs = 31;
1016de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1017de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1018de2362d3Smrg	sq_conf.num_ps_threads = 128;
1019de2362d3Smrg	sq_conf.num_vs_threads = 20;
1020de2362d3Smrg	sq_conf.num_gs_threads = 20;
1021de2362d3Smrg	sq_conf.num_es_threads = 20;
1022de2362d3Smrg	sq_conf.num_hs_threads = 20;
1023de2362d3Smrg	sq_conf.num_ls_threads = 20;
1024de2362d3Smrg	sq_conf.num_ps_stack_entries = 42;
1025de2362d3Smrg	sq_conf.num_vs_stack_entries = 42;
1026de2362d3Smrg	sq_conf.num_gs_stack_entries = 42;
1027de2362d3Smrg	sq_conf.num_es_stack_entries = 42;
1028de2362d3Smrg	sq_conf.num_hs_stack_entries = 42;
1029de2362d3Smrg	sq_conf.num_ls_stack_entries = 42;
1030de2362d3Smrg	break;
1031de2362d3Smrg    case CHIP_FAMILY_JUNIPER:
1032de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1033de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1034de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1035de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1036de2362d3Smrg	sq_conf.num_es_gprs = 31;
1037de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1038de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1039de2362d3Smrg	sq_conf.num_ps_threads = 128;
1040de2362d3Smrg	sq_conf.num_vs_threads = 20;
1041de2362d3Smrg	sq_conf.num_gs_threads = 20;
1042de2362d3Smrg	sq_conf.num_es_threads = 20;
1043de2362d3Smrg	sq_conf.num_hs_threads = 20;
1044de2362d3Smrg	sq_conf.num_ls_threads = 20;
1045de2362d3Smrg	sq_conf.num_ps_stack_entries = 85;
1046de2362d3Smrg	sq_conf.num_vs_stack_entries = 85;
1047de2362d3Smrg	sq_conf.num_gs_stack_entries = 85;
1048de2362d3Smrg	sq_conf.num_es_stack_entries = 85;
1049de2362d3Smrg	sq_conf.num_hs_stack_entries = 85;
1050de2362d3Smrg	sq_conf.num_ls_stack_entries = 85;
1051de2362d3Smrg	break;
1052de2362d3Smrg    case CHIP_FAMILY_CYPRESS:
1053de2362d3Smrg    case CHIP_FAMILY_HEMLOCK:
1054de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1055de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1056de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1057de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1058de2362d3Smrg	sq_conf.num_es_gprs = 31;
1059de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1060de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1061de2362d3Smrg	sq_conf.num_ps_threads = 128;
1062de2362d3Smrg	sq_conf.num_vs_threads = 20;
1063de2362d3Smrg	sq_conf.num_gs_threads = 20;
1064de2362d3Smrg	sq_conf.num_es_threads = 20;
1065de2362d3Smrg	sq_conf.num_hs_threads = 20;
1066de2362d3Smrg	sq_conf.num_ls_threads = 20;
1067de2362d3Smrg	sq_conf.num_ps_stack_entries = 85;
1068de2362d3Smrg	sq_conf.num_vs_stack_entries = 85;
1069de2362d3Smrg	sq_conf.num_gs_stack_entries = 85;
1070de2362d3Smrg	sq_conf.num_es_stack_entries = 85;
1071de2362d3Smrg	sq_conf.num_hs_stack_entries = 85;
1072de2362d3Smrg	sq_conf.num_ls_stack_entries = 85;
1073de2362d3Smrg	break;
1074de2362d3Smrg    case CHIP_FAMILY_PALM:
1075de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1076de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1077de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1078de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1079de2362d3Smrg	sq_conf.num_es_gprs = 31;
1080de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1081de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1082de2362d3Smrg	sq_conf.num_ps_threads = 96;
1083de2362d3Smrg	sq_conf.num_vs_threads = 16;
1084de2362d3Smrg	sq_conf.num_gs_threads = 16;
1085de2362d3Smrg	sq_conf.num_es_threads = 16;
1086de2362d3Smrg	sq_conf.num_hs_threads = 16;
1087de2362d3Smrg	sq_conf.num_ls_threads = 16;
1088de2362d3Smrg	sq_conf.num_ps_stack_entries = 42;
1089de2362d3Smrg	sq_conf.num_vs_stack_entries = 42;
1090de2362d3Smrg	sq_conf.num_gs_stack_entries = 42;
1091de2362d3Smrg	sq_conf.num_es_stack_entries = 42;
1092de2362d3Smrg	sq_conf.num_hs_stack_entries = 42;
1093de2362d3Smrg	sq_conf.num_ls_stack_entries = 42;
1094de2362d3Smrg	break;
1095de2362d3Smrg    case CHIP_FAMILY_SUMO:
1096de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1097de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1098de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1099de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1100de2362d3Smrg	sq_conf.num_es_gprs = 31;
1101de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1102de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1103de2362d3Smrg	sq_conf.num_ps_threads = 96;
1104de2362d3Smrg	sq_conf.num_vs_threads = 25;
1105de2362d3Smrg	sq_conf.num_gs_threads = 25;
1106de2362d3Smrg	sq_conf.num_es_threads = 25;
1107de2362d3Smrg	sq_conf.num_hs_threads = 25;
1108de2362d3Smrg	sq_conf.num_ls_threads = 25;
1109de2362d3Smrg	sq_conf.num_ps_stack_entries = 42;
1110de2362d3Smrg	sq_conf.num_vs_stack_entries = 42;
1111de2362d3Smrg	sq_conf.num_gs_stack_entries = 42;
1112de2362d3Smrg	sq_conf.num_es_stack_entries = 42;
1113de2362d3Smrg	sq_conf.num_hs_stack_entries = 42;
1114de2362d3Smrg	sq_conf.num_ls_stack_entries = 42;
1115de2362d3Smrg	break;
1116de2362d3Smrg    case CHIP_FAMILY_SUMO2:
1117de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1118de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1119de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1120de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1121de2362d3Smrg	sq_conf.num_es_gprs = 31;
1122de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1123de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1124de2362d3Smrg	sq_conf.num_ps_threads = 96;
1125de2362d3Smrg	sq_conf.num_vs_threads = 25;
1126de2362d3Smrg	sq_conf.num_gs_threads = 25;
1127de2362d3Smrg	sq_conf.num_es_threads = 25;
1128de2362d3Smrg	sq_conf.num_hs_threads = 25;
1129de2362d3Smrg	sq_conf.num_ls_threads = 25;
1130de2362d3Smrg	sq_conf.num_ps_stack_entries = 85;
1131de2362d3Smrg	sq_conf.num_vs_stack_entries = 85;
1132de2362d3Smrg	sq_conf.num_gs_stack_entries = 85;
1133de2362d3Smrg	sq_conf.num_es_stack_entries = 85;
1134de2362d3Smrg	sq_conf.num_hs_stack_entries = 85;
1135de2362d3Smrg	sq_conf.num_ls_stack_entries = 85;
1136de2362d3Smrg	break;
1137de2362d3Smrg    case CHIP_FAMILY_BARTS:
1138de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1139de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1140de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1141de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1142de2362d3Smrg	sq_conf.num_es_gprs = 31;
1143de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1144de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1145de2362d3Smrg	sq_conf.num_ps_threads = 128;
1146de2362d3Smrg	sq_conf.num_vs_threads = 20;
1147de2362d3Smrg	sq_conf.num_gs_threads = 20;
1148de2362d3Smrg	sq_conf.num_es_threads = 20;
1149de2362d3Smrg	sq_conf.num_hs_threads = 20;
1150de2362d3Smrg	sq_conf.num_ls_threads = 20;
1151de2362d3Smrg	sq_conf.num_ps_stack_entries = 85;
1152de2362d3Smrg	sq_conf.num_vs_stack_entries = 85;
1153de2362d3Smrg	sq_conf.num_gs_stack_entries = 85;
1154de2362d3Smrg	sq_conf.num_es_stack_entries = 85;
1155de2362d3Smrg	sq_conf.num_hs_stack_entries = 85;
1156de2362d3Smrg	sq_conf.num_ls_stack_entries = 85;
1157de2362d3Smrg	break;
1158de2362d3Smrg    case CHIP_FAMILY_TURKS:
1159de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1160de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1161de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1162de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1163de2362d3Smrg	sq_conf.num_es_gprs = 31;
1164de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1165de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1166de2362d3Smrg	sq_conf.num_ps_threads = 128;
1167de2362d3Smrg	sq_conf.num_vs_threads = 20;
1168de2362d3Smrg	sq_conf.num_gs_threads = 20;
1169de2362d3Smrg	sq_conf.num_es_threads = 20;
1170de2362d3Smrg	sq_conf.num_hs_threads = 20;
1171de2362d3Smrg	sq_conf.num_ls_threads = 20;
1172de2362d3Smrg	sq_conf.num_ps_stack_entries = 42;
1173de2362d3Smrg	sq_conf.num_vs_stack_entries = 42;
1174de2362d3Smrg	sq_conf.num_gs_stack_entries = 42;
1175de2362d3Smrg	sq_conf.num_es_stack_entries = 42;
1176de2362d3Smrg	sq_conf.num_hs_stack_entries = 42;
1177de2362d3Smrg	sq_conf.num_ls_stack_entries = 42;
1178de2362d3Smrg	break;
1179de2362d3Smrg    case CHIP_FAMILY_CAICOS:
1180de2362d3Smrg	sq_conf.num_ps_gprs = 93;
1181de2362d3Smrg	sq_conf.num_vs_gprs = 46;
1182de2362d3Smrg	sq_conf.num_temp_gprs = 4;
1183de2362d3Smrg	sq_conf.num_gs_gprs = 31;
1184de2362d3Smrg	sq_conf.num_es_gprs = 31;
1185de2362d3Smrg	sq_conf.num_hs_gprs = 23;
1186de2362d3Smrg	sq_conf.num_ls_gprs = 23;
1187de2362d3Smrg	sq_conf.num_ps_threads = 128;
1188de2362d3Smrg	sq_conf.num_vs_threads = 10;
1189de2362d3Smrg	sq_conf.num_gs_threads = 10;
1190de2362d3Smrg	sq_conf.num_es_threads = 10;
1191de2362d3Smrg	sq_conf.num_hs_threads = 10;
1192de2362d3Smrg	sq_conf.num_ls_threads = 10;
1193de2362d3Smrg	sq_conf.num_ps_stack_entries = 42;
1194de2362d3Smrg	sq_conf.num_vs_stack_entries = 42;
1195de2362d3Smrg	sq_conf.num_gs_stack_entries = 42;
1196de2362d3Smrg	sq_conf.num_es_stack_entries = 42;
1197de2362d3Smrg	sq_conf.num_hs_stack_entries = 42;
1198de2362d3Smrg	sq_conf.num_ls_stack_entries = 42;
1199de2362d3Smrg	break;
1200de2362d3Smrg    }
1201de2362d3Smrg
1202de2362d3Smrg    evergreen_sq_setup(pScrn, &sq_conf);
1203de2362d3Smrg
1204de2362d3Smrg    BEGIN_BATCH(27);
1205de2362d3Smrg    EREG(SQ_LDS_ALLOC_PS, 0);
1206de2362d3Smrg    EREG(SQ_LDS_RESOURCE_MGMT, 0x10001000);
1207de2362d3Smrg    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
1208de2362d3Smrg
1209de2362d3Smrg    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
1210de2362d3Smrg    E32(0);
1211de2362d3Smrg    E32(0);
1212de2362d3Smrg    E32(0);
1213de2362d3Smrg    E32(0);
1214de2362d3Smrg    E32(0);
1215de2362d3Smrg    E32(0);
1216de2362d3Smrg
1217de2362d3Smrg    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
1218de2362d3Smrg    E32(0);
1219de2362d3Smrg    E32(0);
1220de2362d3Smrg    E32(0);
1221de2362d3Smrg    E32(0);
1222de2362d3Smrg
1223de2362d3Smrg    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
1224de2362d3Smrg    E32(0);
1225de2362d3Smrg    E32(0);
1226de2362d3Smrg    END_BATCH();
1227de2362d3Smrg
1228de2362d3Smrg    /* DB */
1229de2362d3Smrg    BEGIN_BATCH(3 + 2);
1230de2362d3Smrg    EREG(DB_Z_INFO,                           0);
1231de2362d3Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1232de2362d3Smrg    END_BATCH();
1233de2362d3Smrg
1234de2362d3Smrg    BEGIN_BATCH(3 + 2);
1235de2362d3Smrg    EREG(DB_STENCIL_INFO,                     0);
1236de2362d3Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1237de2362d3Smrg    END_BATCH();
1238de2362d3Smrg
1239de2362d3Smrg    BEGIN_BATCH(3 + 2);
1240de2362d3Smrg    EREG(DB_HTILE_DATA_BASE,                    0);
1241de2362d3Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1242de2362d3Smrg    END_BATCH();
1243de2362d3Smrg
1244de2362d3Smrg    BEGIN_BATCH(49);
1245de2362d3Smrg    EREG(DB_DEPTH_CONTROL,                    0);
1246de2362d3Smrg
1247de2362d3Smrg    PACK0(PA_SC_VPORT_ZMIN_0, 2);
1248de2362d3Smrg    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1249de2362d3Smrg    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1250de2362d3Smrg
1251de2362d3Smrg    PACK0(DB_RENDER_CONTROL, 5);
1252de2362d3Smrg    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1253de2362d3Smrg    E32(0); // DB_COUNT_CONTROL
1254de2362d3Smrg    E32(0); // DB_DEPTH_VIEW
1255de2362d3Smrg    E32(0x2a); // DB_RENDER_OVERRIDE
1256de2362d3Smrg    E32(0); // DB_RENDER_OVERRIDE2
1257de2362d3Smrg
1258de2362d3Smrg    PACK0(DB_STENCIL_CLEAR, 2);
1259de2362d3Smrg    E32(0); // DB_STENCIL_CLEAR
1260de2362d3Smrg    E32(0); // DB_DEPTH_CLEAR
1261de2362d3Smrg
1262de2362d3Smrg    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1263de2362d3Smrg					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1264de2362d3Smrg					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1265de2362d3Smrg					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1266de2362d3Smrg
1267de2362d3Smrg    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1268de2362d3Smrg			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1269de2362d3Smrg
1270de2362d3Smrg    // SX
1271de2362d3Smrg    EREG(SX_MISC,               0);
1272de2362d3Smrg
1273de2362d3Smrg    // CB
1274de2362d3Smrg    PACK0(SX_ALPHA_TEST_CONTROL, 5);
1275de2362d3Smrg    E32(0); // SX_ALPHA_TEST_CONTROL
1276de2362d3Smrg    E32(0x00000000); //CB_BLEND_RED
1277de2362d3Smrg    E32(0x00000000); //CB_BLEND_GREEN
1278de2362d3Smrg    E32(0x00000000); //CB_BLEND_BLUE
1279de2362d3Smrg    E32(0x00000000); //CB_BLEND_ALPHA
1280de2362d3Smrg
1281de2362d3Smrg    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1282de2362d3Smrg
1283de2362d3Smrg    // SC
1284de2362d3Smrg    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1285de2362d3Smrg					       (0 << WINDOW_Y_OFFSET_shift)));
1286de2362d3Smrg    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1287de2362d3Smrg    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1288de2362d3Smrg    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1289de2362d3Smrg    END_BATCH();
1290de2362d3Smrg
1291de2362d3Smrg    /* clip boolean is set to always visible -> doesn't matter */
1292de2362d3Smrg    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1293de2362d3Smrg	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1294de2362d3Smrg
1295de2362d3Smrg    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1296de2362d3Smrg	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1297de2362d3Smrg
1298de2362d3Smrg    BEGIN_BATCH(57);
1299de2362d3Smrg    PACK0(PA_SC_MODE_CNTL_0, 2);
1300de2362d3Smrg    E32(0); // PA_SC_MODE_CNTL_0
1301de2362d3Smrg    E32(0); // PA_SC_MODE_CNTL_1
1302de2362d3Smrg
1303de2362d3Smrg    PACK0(PA_SC_LINE_CNTL, 16);
1304de2362d3Smrg    E32(0); // PA_SC_LINE_CNTL
1305de2362d3Smrg    E32(0); // PA_SC_AA_CONFIG
1306de2362d3Smrg    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1307de2362d3Smrg	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1308de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1309de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1310de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1311de2362d3Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1312de2362d3Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1313de2362d3Smrg    E32(0);
1314de2362d3Smrg    E32(0);
1315de2362d3Smrg    E32(0);
1316de2362d3Smrg    E32(0);
1317de2362d3Smrg    E32(0);
1318de2362d3Smrg    E32(0);
1319de2362d3Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1320de2362d3Smrg    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1321de2362d3Smrg
1322de2362d3Smrg    // CL
1323de2362d3Smrg    PACK0(PA_CL_CLIP_CNTL, 8);
1324de2362d3Smrg    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1325de2362d3Smrg    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1326de2362d3Smrg    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1327de2362d3Smrg    E32(0); // PA_CL_VS_OUT_CNTL
1328de2362d3Smrg    E32(0); // PA_CL_NANINF_CNTL
1329de2362d3Smrg    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1330de2362d3Smrg    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1331de2362d3Smrg    E32(0); // PA_SU_PRIM_FILTER_CNTL
1332de2362d3Smrg
1333de2362d3Smrg    // SU
1334de2362d3Smrg    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1335de2362d3Smrg    E32(0);
1336de2362d3Smrg    E32(0);
1337de2362d3Smrg    E32(0);
1338de2362d3Smrg    E32(0);
1339de2362d3Smrg    E32(0);
1340de2362d3Smrg    E32(0);
1341de2362d3Smrg
1342de2362d3Smrg    /* src = semantic id 0; mask = semantic id 1 */
1343de2362d3Smrg    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1344de2362d3Smrg			   (1 << SEMANTIC_1_shift)));
1345de2362d3Smrg    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1346de2362d3Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1347de2362d3Smrg    E32(((0    << SEMANTIC_shift)	|
1348de2362d3Smrg	 (0x01 << DEFAULT_VAL_shift)));
1349de2362d3Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1350de2362d3Smrg    E32(((1    << SEMANTIC_shift)	|
1351de2362d3Smrg	 (0x01 << DEFAULT_VAL_shift)));
1352de2362d3Smrg
1353de2362d3Smrg    PACK0(SPI_INPUT_Z, 8);
1354de2362d3Smrg    E32(0); // SPI_INPUT_Z
1355de2362d3Smrg    E32(0); // SPI_FOG_CNTL
1356de2362d3Smrg    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1357de2362d3Smrg    E32(0); // SPI_PS_IN_CONTROL_2
1358de2362d3Smrg    E32(0);
1359de2362d3Smrg    E32(0);
1360de2362d3Smrg    E32(0);
1361de2362d3Smrg    E32(0);
1362de2362d3Smrg    END_BATCH();
1363de2362d3Smrg
1364de2362d3Smrg    // clear FS
1365de2362d3Smrg    fs_conf.bo = accel_state->shaders_bo;
1366de2362d3Smrg    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1367de2362d3Smrg
1368de2362d3Smrg    // VGT
1369de2362d3Smrg    BEGIN_BATCH(46);
1370de2362d3Smrg
1371de2362d3Smrg    PACK0(VGT_MAX_VTX_INDX, 4);
1372de2362d3Smrg    E32(0xffffff);
1373de2362d3Smrg    E32(0);
1374de2362d3Smrg    E32(0);
1375de2362d3Smrg    E32(0);
1376de2362d3Smrg
1377de2362d3Smrg    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1378de2362d3Smrg    E32(0);
1379de2362d3Smrg    E32(0);
1380de2362d3Smrg
1381de2362d3Smrg    PACK0(VGT_REUSE_OFF, 2);
1382de2362d3Smrg    E32(0);
1383de2362d3Smrg    E32(0);
1384de2362d3Smrg
1385de2362d3Smrg    PACK0(PA_SU_POINT_SIZE, 17);
1386de2362d3Smrg    E32(0); // PA_SU_POINT_SIZE
1387de2362d3Smrg    E32(0); // PA_SU_POINT_MINMAX
1388de2362d3Smrg    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1389de2362d3Smrg    E32(0); // PA_SC_LINE_STIPPLE
1390de2362d3Smrg    E32(0); // VGT_OUTPUT_PATH_CNTL
1391de2362d3Smrg    E32(0); // VGT_HOS_CNTL
1392de2362d3Smrg    E32(0);
1393de2362d3Smrg    E32(0);
1394de2362d3Smrg    E32(0);
1395de2362d3Smrg    E32(0);
1396de2362d3Smrg    E32(0);
1397de2362d3Smrg    E32(0);
1398de2362d3Smrg    E32(0);
1399de2362d3Smrg    E32(0);
1400de2362d3Smrg    E32(0);
1401de2362d3Smrg    E32(0);
1402de2362d3Smrg    E32(0); // VGT_GS_MODE
1403de2362d3Smrg
1404de2362d3Smrg    EREG(VGT_PRIMITIVEID_EN,                  0);
1405de2362d3Smrg    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1406de2362d3Smrg    EREG(VGT_SHADER_STAGES_EN,          0);
1407de2362d3Smrg
1408de2362d3Smrg    PACK0(VGT_STRMOUT_CONFIG, 2);
1409de2362d3Smrg    E32(0);
1410de2362d3Smrg    E32(0);
1411de2362d3Smrg    END_BATCH();
1412de2362d3Smrg}
1413de2362d3Smrg
1414de2362d3Smrg
1415de2362d3Smrg/*
1416de2362d3Smrg * Commands
1417de2362d3Smrg */
1418de2362d3Smrg
1419de2362d3Smrgvoid
1420de2362d3Smrgevergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1421de2362d3Smrg{
1422de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1423de2362d3Smrg
1424de2362d3Smrg    BEGIN_BATCH(10);
1425de2362d3Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1426de2362d3Smrg    PACK3(IT_INDEX_TYPE, 1);
1427de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1428de2362d3Smrg    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1429de2362d3Smrg#else
1430de2362d3Smrg    E32(draw_conf->index_type);
1431de2362d3Smrg#endif
1432de2362d3Smrg    PACK3(IT_NUM_INSTANCES, 1);
1433de2362d3Smrg    E32(draw_conf->num_instances);
1434de2362d3Smrg    PACK3(IT_DRAW_INDEX_AUTO, 2);
1435de2362d3Smrg    E32(draw_conf->num_indices);
1436de2362d3Smrg    E32(draw_conf->vgt_draw_initiator);
1437de2362d3Smrg    END_BATCH();
1438de2362d3Smrg}
1439de2362d3Smrg
1440de2362d3Smrgvoid evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1441de2362d3Smrg{
1442de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1443de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1444de2362d3Smrg    draw_config_t   draw_conf;
1445de2362d3Smrg    vtx_resource_t  vtx_res;
1446de2362d3Smrg
1447de2362d3Smrg    if (accel_state->vbo.vb_start_op == -1)
1448de2362d3Smrg      return;
1449de2362d3Smrg
1450de2362d3Smrg    CLEAR (draw_conf);
1451de2362d3Smrg    CLEAR (vtx_res);
1452de2362d3Smrg
1453de2362d3Smrg    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1454de2362d3Smrg	radeon_ib_discard(pScrn);
1455de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
1456de2362d3Smrg	return;
1457de2362d3Smrg    }
1458de2362d3Smrg
1459de2362d3Smrg    /* Vertex buffer setup */
1460de2362d3Smrg    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1461de2362d3Smrg    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1462de2362d3Smrg    vtx_res.vtx_size_dw     = vtx_size / 4;
1463de2362d3Smrg    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
146418781e08Smrg    vtx_res.vb_addr         = accel_state->vbo.vb_start_op;
1465de2362d3Smrg    vtx_res.bo              = accel_state->vbo.vb_bo;
1466de2362d3Smrg    vtx_res.dst_sel_x       = SQ_SEL_X;
1467de2362d3Smrg    vtx_res.dst_sel_y       = SQ_SEL_Y;
1468de2362d3Smrg    vtx_res.dst_sel_z       = SQ_SEL_Z;
1469de2362d3Smrg    vtx_res.dst_sel_w       = SQ_SEL_W;
1470de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1471de2362d3Smrg    vtx_res.endian          = SQ_ENDIAN_8IN32;
1472de2362d3Smrg#endif
1473de2362d3Smrg    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1474de2362d3Smrg
1475de2362d3Smrg    /* Draw */
1476de2362d3Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
1477de2362d3Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1478de2362d3Smrg    draw_conf.num_instances      = 1;
1479de2362d3Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1480de2362d3Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1481de2362d3Smrg
1482de2362d3Smrg    evergreen_draw_auto(pScrn, &draw_conf);
1483de2362d3Smrg
1484de2362d3Smrg    /* sync dst surface */
1485de2362d3Smrg    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
148618781e08Smrg				  accel_state->dst_size, 0,
1487de2362d3Smrg				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1488de2362d3Smrg
1489de2362d3Smrg    accel_state->vbo.vb_start_op = -1;
1490de2362d3Smrg    accel_state->cbuf.vb_start_op = -1;
1491de2362d3Smrg    accel_state->ib_reset_op = 0;
1492de2362d3Smrg
1493de2362d3Smrg}
1494de2362d3Smrg
1495