1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Authors: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg#ifdef HAVE_CONFIG_H
27921a55d8Smrg#include "config.h"
28921a55d8Smrg#endif
29921a55d8Smrg
3043df4709Smrg#ifdef XF86DRM_MODE
3143df4709Smrg
32921a55d8Smrg#include "xf86.h"
33921a55d8Smrg
34921a55d8Smrg#include <errno.h>
35921a55d8Smrg
36921a55d8Smrg#include "radeon.h"
37921a55d8Smrg#include "evergreen_shader.h"
38921a55d8Smrg#include "radeon_reg.h"
39921a55d8Smrg#include "evergreen_reg.h"
40921a55d8Smrg#include "evergreen_state.h"
41921a55d8Smrg
4243df4709Smrg#include "radeon_drm.h"
43921a55d8Smrg#include "radeon_vbo.h"
44921a55d8Smrg#include "radeon_exa_shared.h"
45921a55d8Smrg
46b13dfe66Smrgstatic const uint32_t EVERGREEN_ROP[16] = {
47b13dfe66Smrg    RADEON_ROP3_ZERO, /* GXclear        */
48b13dfe66Smrg    RADEON_ROP3_DSa,  /* Gxand          */
49b13dfe66Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
50b13dfe66Smrg    RADEON_ROP3_S,    /* GXcopy         */
51b13dfe66Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
52b13dfe66Smrg    RADEON_ROP3_D,    /* GXnoop         */
53b13dfe66Smrg    RADEON_ROP3_DSx,  /* GXxor          */
54b13dfe66Smrg    RADEON_ROP3_DSo,  /* GXor           */
55b13dfe66Smrg    RADEON_ROP3_DSon, /* GXnor          */
56b13dfe66Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
57b13dfe66Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
58b13dfe66Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
59b13dfe66Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
60b13dfe66Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
61b13dfe66Smrg    RADEON_ROP3_DSan, /* GXnand         */
62b13dfe66Smrg    RADEON_ROP3_ONE,  /* GXset          */
63b13dfe66Smrg};
64b13dfe66Smrg
65921a55d8Smrgvoid
66921a55d8Smrgevergreen_start_3d(ScrnInfoPtr pScrn)
67921a55d8Smrg{
68921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
69921a55d8Smrg
70921a55d8Smrg    BEGIN_BATCH(3);
71921a55d8Smrg    PACK3(IT_CONTEXT_CONTROL, 2);
72921a55d8Smrg    E32(0x80000000);
73921a55d8Smrg    E32(0x80000000);
74921a55d8Smrg    END_BATCH();
75921a55d8Smrg
76921a55d8Smrg}
77921a55d8Smrg
7840732134Srjsunsigned eg_tile_split(unsigned tile_split)
7940732134Srjs{
8040732134Srjs	switch (tile_split) {
8140732134Srjs	case 64:	tile_split = 0;	break;
8240732134Srjs	case 128:	tile_split = 1;	break;
8340732134Srjs	case 256:	tile_split = 2;	break;
8440732134Srjs	case 512:	tile_split = 3;	break;
8540732134Srjs	case 1024:	tile_split = 4;	break;
8640732134Srjs	case 2048:	tile_split = 5;	break;
8743df4709Smrg	default:
8840732134Srjs	case 4096:	tile_split = 6;	break;
8940732134Srjs	}
9040732134Srjs	return tile_split;
9140732134Srjs}
9240732134Srjs
9340732134Srjsstatic unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect)
9440732134Srjs{
9540732134Srjs	switch (macro_tile_aspect) {
9640732134Srjs	default:
9740732134Srjs	case 1:	macro_tile_aspect = 0;	break;
9840732134Srjs	case 2:	macro_tile_aspect = 1;	break;
9940732134Srjs	case 4:	macro_tile_aspect = 2;	break;
10040732134Srjs	case 8:	macro_tile_aspect = 3;	break;
10140732134Srjs	}
10240732134Srjs	return macro_tile_aspect;
10340732134Srjs}
10440732134Srjs
10540732134Srjsstatic unsigned eg_bank_wh(unsigned bankwh)
10640732134Srjs{
10740732134Srjs	switch (bankwh) {
10840732134Srjs	default:
10940732134Srjs	case 1:	bankwh = 0;	break;
11040732134Srjs	case 2:	bankwh = 1;	break;
11140732134Srjs	case 4:	bankwh = 2;	break;
11240732134Srjs	case 8:	bankwh = 3;	break;
11340732134Srjs	}
11440732134Srjs	return bankwh;
11540732134Srjs}
11640732134Srjs
11740732134Srjsstatic unsigned eg_nbanks(unsigned nbanks)
11840732134Srjs{
11940732134Srjs	switch (nbanks) {
12040732134Srjs	default:
12140732134Srjs	case 2: nbanks = 0; break;
12240732134Srjs	case 4: nbanks = 1; break;
12340732134Srjs	case 8: nbanks = 2; break;
12440732134Srjs	case 16: nbanks = 3; break;
12540732134Srjs	}
12640732134Srjs	return nbanks;
12740732134Srjs}
12840732134Srjs
129921a55d8Smrg/*
130921a55d8Smrg * Setup of functional groups
131921a55d8Smrg */
132921a55d8Smrg
133921a55d8Smrg// asic stack/thread/gpr limits - need to query the drm
134921a55d8Smrgstatic void
135921a55d8Smrgevergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
136921a55d8Smrg{
137921a55d8Smrg    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
138921a55d8Smrg    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
139921a55d8Smrg    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
140921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
141921a55d8Smrg
142921a55d8Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
143921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
14440732134Srjs	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
14540732134Srjs	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
146921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS))
147921a55d8Smrg	sq_config = 0;
148921a55d8Smrg    else
149921a55d8Smrg	sq_config = VC_ENABLE_bit;
150921a55d8Smrg
151921a55d8Smrg    sq_config |= (EXPORT_SRC_C_bit |
152921a55d8Smrg		  (sq_conf->cs_prio << CS_PRIO_shift) |
153921a55d8Smrg		  (sq_conf->ls_prio << LS_PRIO_shift) |
154921a55d8Smrg		  (sq_conf->hs_prio << HS_PRIO_shift) |
155921a55d8Smrg		  (sq_conf->ps_prio << PS_PRIO_shift) |
156921a55d8Smrg		  (sq_conf->vs_prio << VS_PRIO_shift) |
157921a55d8Smrg		  (sq_conf->gs_prio << GS_PRIO_shift) |
158921a55d8Smrg		  (sq_conf->es_prio << ES_PRIO_shift));
159921a55d8Smrg
160921a55d8Smrg    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
161921a55d8Smrg			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
162921a55d8Smrg			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
163921a55d8Smrg    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
164921a55d8Smrg			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
165921a55d8Smrg    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
166921a55d8Smrg			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
167921a55d8Smrg
168921a55d8Smrg    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
169921a55d8Smrg			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
170921a55d8Smrg			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
171921a55d8Smrg			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
172921a55d8Smrg    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
173921a55d8Smrg				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
174921a55d8Smrg
175921a55d8Smrg    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
176921a55d8Smrg				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
177921a55d8Smrg
178921a55d8Smrg    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
179921a55d8Smrg				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
180921a55d8Smrg
181921a55d8Smrg    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
182921a55d8Smrg				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
183921a55d8Smrg
184921a55d8Smrg    BEGIN_BATCH(16);
185921a55d8Smrg    /* disable dyn gprs */
186921a55d8Smrg    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
187921a55d8Smrg    PACK0(SQ_CONFIG, 4);
188921a55d8Smrg    E32(sq_config);
189921a55d8Smrg    E32(sq_gpr_resource_mgmt_1);
190921a55d8Smrg    E32(sq_gpr_resource_mgmt_2);
191921a55d8Smrg    E32(sq_gpr_resource_mgmt_3);
192921a55d8Smrg    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
193921a55d8Smrg    E32(sq_thread_resource_mgmt);
194921a55d8Smrg    E32(sq_thread_resource_mgmt_2);
195921a55d8Smrg    E32(sq_stack_resource_mgmt_1);
196921a55d8Smrg    E32(sq_stack_resource_mgmt_2);
197921a55d8Smrg    E32(sq_stack_resource_mgmt_3);
198921a55d8Smrg    END_BATCH();
199921a55d8Smrg}
200921a55d8Smrg
201c4ae5be6Smrg/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
202c4ae5be6Smrg * we use here.
203c4ae5be6Smrg */
204921a55d8Smrgvoid
205921a55d8Smrgevergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
206921a55d8Smrg{
207b13dfe66Smrg    uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
20840732134Srjs    unsigned pitch, slice, w, h, array_mode, nbanks;
20940732134Srjs    uint32_t tile_split, macro_aspect, bankw, bankh;
210921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
211921a55d8Smrg
21243df4709Smrg#if defined(XF86DRM_MODE)
21340732134Srjs    if (cb_conf->surface) {
21440732134Srjs	switch (cb_conf->surface->level[0].mode) {
21540732134Srjs	case RADEON_SURF_MODE_1D:
21640732134Srjs		array_mode = 2;
21740732134Srjs		break;
21840732134Srjs	case RADEON_SURF_MODE_2D:
21940732134Srjs		array_mode = 4;
22040732134Srjs		break;
22140732134Srjs	default:
22240732134Srjs		array_mode = 0;
22340732134Srjs		break;
22440732134Srjs	}
22540732134Srjs	w = cb_conf->surface->level[0].npix_x;
22640732134Srjs	h = cb_conf->surface->level[0].npix_y;
22740732134Srjs	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
22840732134Srjs	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
22940732134Srjs	tile_split = cb_conf->surface->tile_split;
23040732134Srjs	macro_aspect = cb_conf->surface->mtilea;
23140732134Srjs	bankw = cb_conf->surface->bankw;
23240732134Srjs	bankh = cb_conf->surface->bankh;
23340732134Srjs	tile_split = eg_tile_split(tile_split);
23440732134Srjs	macro_aspect = eg_macro_tile_aspect(macro_aspect);
23540732134Srjs	bankw = eg_bank_wh(bankw);
23640732134Srjs	bankh = eg_bank_wh(bankh);
23743df4709Smrg    } else
23843df4709Smrg#endif
23943df4709Smrg    {
24040732134Srjs	pitch = (cb_conf->w / 8) - 1;
24140732134Srjs	h = RADEON_ALIGN(cb_conf->h, 8);
24240732134Srjs	slice = ((cb_conf->w * h) / 64) - 1;
24340732134Srjs	array_mode = cb_conf->array_mode;
24440732134Srjs	w = cb_conf->w;
24540732134Srjs	tile_split = 4;
24640732134Srjs	macro_aspect = 0;
24740732134Srjs	bankw = 0;
24840732134Srjs	bankh = 0;
24940732134Srjs    }
25040732134Srjs    nbanks = info->num_banks;
25140732134Srjs    nbanks = eg_nbanks(nbanks);
25240732134Srjs
25340732134Srjs    cb_color_attrib |= (tile_split << CB_COLOR0_ATTRIB__TILE_SPLIT_shift)|
25440732134Srjs		       (nbanks << CB_COLOR0_ATTRIB__NUM_BANKS_shift) |
25540732134Srjs		       (bankw << CB_COLOR0_ATTRIB__BANK_WIDTH_shift) |
25640732134Srjs		       (bankh << CB_COLOR0_ATTRIB__BANK_HEIGHT_shift) |
25740732134Srjs		       (macro_aspect << CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift);
258921a55d8Smrg    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
259921a55d8Smrg		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
26040732134Srjs		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
261921a55d8Smrg		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
262921a55d8Smrg		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
263921a55d8Smrg		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
264921a55d8Smrg		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
265921a55d8Smrg    if (cb_conf->blend_clamp)
266921a55d8Smrg	cb_color_info |= BLEND_CLAMP_bit;
267921a55d8Smrg    if (cb_conf->fast_clear)
268921a55d8Smrg	cb_color_info |= FAST_CLEAR_bit;
269921a55d8Smrg    if (cb_conf->compression)
270921a55d8Smrg	cb_color_info |= COMPRESSION_bit;
271921a55d8Smrg    if (cb_conf->blend_bypass)
272921a55d8Smrg	cb_color_info |= BLEND_BYPASS_bit;
273921a55d8Smrg    if (cb_conf->simple_float)
274921a55d8Smrg	cb_color_info |= SIMPLE_FLOAT_bit;
275921a55d8Smrg    if (cb_conf->round_mode)
276921a55d8Smrg	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
277921a55d8Smrg    if (cb_conf->tile_compact)
278921a55d8Smrg	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
279921a55d8Smrg    if (cb_conf->rat)
280921a55d8Smrg	cb_color_info |= RAT_bit;
281921a55d8Smrg
282921a55d8Smrg    /* bit 4 needs to be set for linear and depth/stencil surfaces */
283b13dfe66Smrg    if (cb_conf->non_disp_tiling)
284b13dfe66Smrg	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
285921a55d8Smrg
286921a55d8Smrg    switch (cb_conf->resource_type) {
287921a55d8Smrg    case BUFFER:
288921a55d8Smrg	/* number of elements in the surface */
289921a55d8Smrg	cb_color_dim = pitch * slice;
290921a55d8Smrg	break;
291921a55d8Smrg    default:
292921a55d8Smrg	/* w/h of the surface */
29340732134Srjs	cb_color_dim = (((w - 1) << WIDTH_MAX_shift) |
294921a55d8Smrg			((cb_conf->h - 1) << HEIGHT_MAX_shift));
295921a55d8Smrg	break;
296921a55d8Smrg    }
297921a55d8Smrg
298921a55d8Smrg    BEGIN_BATCH(3 + 2);
299921a55d8Smrg    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
300921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
301921a55d8Smrg    END_BATCH();
302921a55d8Smrg
303921a55d8Smrg    /* Set CMASK & FMASK buffer to the offset of color buffer as
304921a55d8Smrg     * we don't use those this shouldn't cause any issue and we
305921a55d8Smrg     * then have a valid cmd stream
306921a55d8Smrg     */
307921a55d8Smrg    BEGIN_BATCH(3 + 2);
308921a55d8Smrg    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
309921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
310921a55d8Smrg    END_BATCH();
311921a55d8Smrg    BEGIN_BATCH(3 + 2);
312921a55d8Smrg    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
313921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
314921a55d8Smrg    END_BATCH();
315921a55d8Smrg
316921a55d8Smrg    /* tiling config */
317921a55d8Smrg    BEGIN_BATCH(3 + 2);
318921a55d8Smrg    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
319921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
320921a55d8Smrg    END_BATCH();
321921a55d8Smrg    BEGIN_BATCH(3 + 2);
322921a55d8Smrg    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
323921a55d8Smrg    RELOC_BATCH(cb_conf->bo, 0, domain);
324921a55d8Smrg    END_BATCH();
325921a55d8Smrg
326b13dfe66Smrg    BEGIN_BATCH(33);
327921a55d8Smrg    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
328921a55d8Smrg    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
329921a55d8Smrg    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
330921a55d8Smrg    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
331921a55d8Smrg    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
332921a55d8Smrg    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
333921a55d8Smrg    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
334921a55d8Smrg    E32(0);
335921a55d8Smrg    E32(0);
336921a55d8Smrg    E32(0);
337921a55d8Smrg    E32(0);
338b13dfe66Smrg    EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
339b13dfe66Smrg    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
340b13dfe66Smrg					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
341b13dfe66Smrg    EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
342921a55d8Smrg    END_BATCH();
343b13dfe66Smrg
344921a55d8Smrg}
345921a55d8Smrg
346921a55d8Smrgstatic void
347921a55d8Smrgevergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
348921a55d8Smrg			      uint32_t size, uint64_t mc_addr,
349921a55d8Smrg			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
350921a55d8Smrg{
351921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
352921a55d8Smrg    uint32_t cp_coher_size;
353921a55d8Smrg    if (size == 0xffffffff)
354921a55d8Smrg	cp_coher_size = 0xffffffff;
355921a55d8Smrg    else
356921a55d8Smrg	cp_coher_size = ((size + 255) >> 8);
357921a55d8Smrg
358921a55d8Smrg    BEGIN_BATCH(5 + 2);
359921a55d8Smrg    PACK3(IT_SURFACE_SYNC, 4);
360921a55d8Smrg    E32(sync_type);
361921a55d8Smrg    E32(cp_coher_size);
362921a55d8Smrg    E32((mc_addr >> 8));
363921a55d8Smrg    E32(10); /* poll interval */
364921a55d8Smrg    RELOC_BATCH(bo, rdomains, wdomain);
365921a55d8Smrg    END_BATCH();
366921a55d8Smrg}
367921a55d8Smrg
368921a55d8Smrg/* inserts a wait for vline in the command stream */
369921a55d8Smrgvoid evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
370921a55d8Smrg				  xf86CrtcPtr crtc, int start, int stop)
371921a55d8Smrg{
372921a55d8Smrg    RADEONInfoPtr  info = RADEONPTR(pScrn);
373921a55d8Smrg    drmmode_crtc_private_ptr drmmode_crtc;
37443df4709Smrg    uint32_t offset;
375921a55d8Smrg
376921a55d8Smrg    if (!crtc)
377921a55d8Smrg        return;
378921a55d8Smrg
379921a55d8Smrg    drmmode_crtc = crtc->driver_private;
380921a55d8Smrg
381921a55d8Smrg    if (!crtc->enabled)
382921a55d8Smrg        return;
383921a55d8Smrg
38443df4709Smrg    if (info->cs) {
38543df4709Smrg        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
38643df4709Smrg	    return;
38743df4709Smrg    } else {
38843df4709Smrg#ifdef USE_EXA
38943df4709Smrg	if (info->useEXA)
39043df4709Smrg	    offset = exaGetPixmapOffset(pPix);
39143df4709Smrg	else
39243df4709Smrg#endif
39343df4709Smrg	    offset = pPix->devPrivate.ptr - info->FB;
39443df4709Smrg
39543df4709Smrg	/* if drawing to front buffer */
39643df4709Smrg	if (offset != 0)
39743df4709Smrg	    return;
39843df4709Smrg    }
399921a55d8Smrg
40040732134Srjs    start = max(start, crtc->y);
40140732134Srjs    stop = min(stop, crtc->y + crtc->mode.VDisplay);
402921a55d8Smrg
40340732134Srjs    if (start >= stop)
404921a55d8Smrg        return;
405921a55d8Smrg
406921a55d8Smrg    BEGIN_BATCH(11);
407921a55d8Smrg    /* set the VLINE range */
408921a55d8Smrg    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
409921a55d8Smrg	 (start << EVERGREEN_VLINE_START_SHIFT) |
410921a55d8Smrg	 (stop << EVERGREEN_VLINE_END_SHIFT));
411921a55d8Smrg
412921a55d8Smrg    /* tell the CP to poll the VLINE state register */
413921a55d8Smrg    PACK3(IT_WAIT_REG_MEM, 6);
414921a55d8Smrg    E32(IT_WAIT_REG | IT_WAIT_EQ);
415921a55d8Smrg    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
416921a55d8Smrg    E32(0);
417921a55d8Smrg    E32(0);                          // Ref value
418921a55d8Smrg    E32(EVERGREEN_VLINE_STAT);    // Mask
419921a55d8Smrg    E32(10);                         // Wait interval
420921a55d8Smrg    /* add crtc reloc */
421921a55d8Smrg    PACK3(IT_NOP, 1);
422921a55d8Smrg    E32(drmmode_crtc->mode_crtc->crtc_id);
423921a55d8Smrg    END_BATCH();
424921a55d8Smrg}
425921a55d8Smrg
426b13dfe66Smrgvoid
427b13dfe66Smrgevergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
428b13dfe66Smrg{
429b13dfe66Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
430b13dfe66Smrg
431b13dfe66Smrg    BEGIN_BATCH(8);
432b13dfe66Smrg    /* Interpolator setup */
433b13dfe66Smrg    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
434b13dfe66Smrg    PACK0(SPI_PS_IN_CONTROL_0, 3);
435b13dfe66Smrg    E32(((num_interp << NUM_INTERP_shift) |
436b13dfe66Smrg	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
437b13dfe66Smrg    E32(0); // SPI_PS_IN_CONTROL_1
438b13dfe66Smrg    E32(0); // SPI_INTERP_CONTROL_0
439b13dfe66Smrg    END_BATCH();
440b13dfe66Smrg}
441b13dfe66Smrg
442921a55d8Smrgvoid
443921a55d8Smrgevergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
444921a55d8Smrg{
445921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
446921a55d8Smrg    uint32_t sq_pgm_resources;
447921a55d8Smrg
448921a55d8Smrg    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
449921a55d8Smrg			(fs_conf->stack_size << STACK_SIZE_shift));
450921a55d8Smrg
451921a55d8Smrg    if (fs_conf->dx10_clamp)
452921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
453921a55d8Smrg
454921a55d8Smrg    BEGIN_BATCH(3 + 2);
455921a55d8Smrg    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
456921a55d8Smrg    RELOC_BATCH(fs_conf->bo, domain, 0);
457921a55d8Smrg    END_BATCH();
458921a55d8Smrg
459921a55d8Smrg    BEGIN_BATCH(3);
460921a55d8Smrg    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
461921a55d8Smrg    END_BATCH();
462921a55d8Smrg}
463921a55d8Smrg
464c4ae5be6Smrg/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
465c4ae5be6Smrg * but none that we use here.
466c4ae5be6Smrg */
467921a55d8Smrgvoid
468921a55d8Smrgevergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
469921a55d8Smrg{
470921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
471921a55d8Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
472921a55d8Smrg
473921a55d8Smrg    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
474921a55d8Smrg			(vs_conf->stack_size << STACK_SIZE_shift));
475921a55d8Smrg
476921a55d8Smrg    if (vs_conf->dx10_clamp)
477921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
478921a55d8Smrg    if (vs_conf->uncached_first_inst)
479921a55d8Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
480921a55d8Smrg
481921a55d8Smrg    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
482921a55d8Smrg			  (vs_conf->double_round << DOUBLE_ROUND_shift));
483921a55d8Smrg
484921a55d8Smrg    if (vs_conf->allow_sdi)
485921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
486921a55d8Smrg    if (vs_conf->allow_sd0)
487921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
488921a55d8Smrg    if (vs_conf->allow_ddi)
489921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
490921a55d8Smrg    if (vs_conf->allow_ddo)
491921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
492921a55d8Smrg
493921a55d8Smrg    /* flush SQ cache */
494921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
495921a55d8Smrg				  vs_conf->shader_size, vs_conf->shader_addr,
496921a55d8Smrg				  vs_conf->bo, domain, 0);
497921a55d8Smrg
498921a55d8Smrg    BEGIN_BATCH(3 + 2);
499921a55d8Smrg    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
500921a55d8Smrg    RELOC_BATCH(vs_conf->bo, domain, 0);
501921a55d8Smrg    END_BATCH();
502921a55d8Smrg
503921a55d8Smrg    BEGIN_BATCH(4);
504921a55d8Smrg    PACK0(SQ_PGM_RESOURCES_VS, 2);
505921a55d8Smrg    E32(sq_pgm_resources);
506921a55d8Smrg    E32(sq_pgm_resources_2);
507921a55d8Smrg    END_BATCH();
508921a55d8Smrg}
509921a55d8Smrg
510c4ae5be6Smrg/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
511c4ae5be6Smrg * but none that we use here.
512c4ae5be6Smrg */
513921a55d8Smrgvoid
514921a55d8Smrgevergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
515921a55d8Smrg{
516921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
517921a55d8Smrg    uint32_t sq_pgm_resources, sq_pgm_resources_2;
518921a55d8Smrg
519921a55d8Smrg    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
520921a55d8Smrg			(ps_conf->stack_size << STACK_SIZE_shift));
521921a55d8Smrg
522921a55d8Smrg    if (ps_conf->dx10_clamp)
523921a55d8Smrg	sq_pgm_resources |= DX10_CLAMP_bit;
524921a55d8Smrg    if (ps_conf->uncached_first_inst)
525921a55d8Smrg	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
526921a55d8Smrg    if (ps_conf->clamp_consts)
527921a55d8Smrg	sq_pgm_resources |= CLAMP_CONSTS_bit;
528921a55d8Smrg
529921a55d8Smrg    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
530921a55d8Smrg			  (ps_conf->double_round << DOUBLE_ROUND_shift));
531921a55d8Smrg
532921a55d8Smrg    if (ps_conf->allow_sdi)
533921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
534921a55d8Smrg    if (ps_conf->allow_sd0)
535921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
536921a55d8Smrg    if (ps_conf->allow_ddi)
537921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
538921a55d8Smrg    if (ps_conf->allow_ddo)
539921a55d8Smrg	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
540921a55d8Smrg
541921a55d8Smrg    /* flush SQ cache */
542921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
543921a55d8Smrg				  ps_conf->shader_size, ps_conf->shader_addr,
544921a55d8Smrg				  ps_conf->bo, domain, 0);
545921a55d8Smrg
546921a55d8Smrg    BEGIN_BATCH(3 + 2);
547921a55d8Smrg    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
548921a55d8Smrg    RELOC_BATCH(ps_conf->bo, domain, 0);
549921a55d8Smrg    END_BATCH();
550921a55d8Smrg
551921a55d8Smrg    BEGIN_BATCH(5);
552921a55d8Smrg    PACK0(SQ_PGM_RESOURCES_PS, 3);
553921a55d8Smrg    E32(sq_pgm_resources);
554921a55d8Smrg    E32(sq_pgm_resources_2);
555921a55d8Smrg    E32(ps_conf->export_mode);
556921a55d8Smrg    END_BATCH();
557921a55d8Smrg}
558921a55d8Smrg
559921a55d8Smrgvoid
560921a55d8Smrgevergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
561921a55d8Smrg{
562921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
563921a55d8Smrg    /* size reg is units of 16 consts (4 dwords each) */
564921a55d8Smrg    uint32_t size = const_conf->size_bytes >> 8;
565921a55d8Smrg
566921a55d8Smrg    if (size == 0)
567921a55d8Smrg	size = 1;
568921a55d8Smrg
56940732134Srjs#if X_BYTE_ORDER == X_BIG_ENDIAN
57040732134Srjs    {
57143df4709Smrg	    uint32_t count = size << 4, *p = const_conf->cpu_ptr;
57240732134Srjs
57340732134Srjs	    while(count--) {
57440732134Srjs		    *p = cpu_to_le32(*p);
57540732134Srjs		    p++;
57640732134Srjs	    }
57740732134Srjs    }
57840732134Srjs#endif
57940732134Srjs
580921a55d8Smrg    /* flush SQ cache */
581921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
582921a55d8Smrg				  const_conf->size_bytes, const_conf->const_addr,
583921a55d8Smrg				  const_conf->bo, domain, 0);
584921a55d8Smrg
585921a55d8Smrg    switch (const_conf->type) {
586921a55d8Smrg    case SHADER_TYPE_VS:
587921a55d8Smrg	BEGIN_BATCH(3);
588921a55d8Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
589921a55d8Smrg	END_BATCH();
590921a55d8Smrg	BEGIN_BATCH(3 + 2);
591921a55d8Smrg	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
592921a55d8Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
593921a55d8Smrg	END_BATCH();
594921a55d8Smrg	break;
595921a55d8Smrg    case SHADER_TYPE_PS:
596921a55d8Smrg	BEGIN_BATCH(3);
597921a55d8Smrg	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
598921a55d8Smrg	END_BATCH();
599921a55d8Smrg	BEGIN_BATCH(3 + 2);
600921a55d8Smrg	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
601921a55d8Smrg	RELOC_BATCH(const_conf->bo, domain, 0);
602921a55d8Smrg	END_BATCH();
603921a55d8Smrg	break;
604921a55d8Smrg    default:
605921a55d8Smrg	ErrorF("Unsupported const type %d\n", const_conf->type);
606921a55d8Smrg	break;
607921a55d8Smrg    }
608921a55d8Smrg
609921a55d8Smrg}
610921a55d8Smrg
611921a55d8Smrgvoid
612921a55d8Smrgevergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
613921a55d8Smrg{
614921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
615921a55d8Smrg    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
616921a55d8Smrg     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
617921a55d8Smrg     */
618921a55d8Smrg    BEGIN_BATCH(3);
619921a55d8Smrg    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
620921a55d8Smrg    END_BATCH();
621921a55d8Smrg}
622921a55d8Smrg
623c4ae5be6Smrg/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
624c4ae5be6Smrg * but none that we use here.
625c4ae5be6Smrg */
626921a55d8Smrgstatic void
627921a55d8Smrgevergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
628921a55d8Smrg{
629921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
630921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
631921a55d8Smrg    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
632921a55d8Smrg
633921a55d8Smrg    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
634921a55d8Smrg			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
635921a55d8Smrg			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
636921a55d8Smrg			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
637921a55d8Smrg			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
638921a55d8Smrg    if (res->clamp_x)
639921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
640921a55d8Smrg
641921a55d8Smrg    if (res->format_comp_all)
642921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
643921a55d8Smrg
644921a55d8Smrg    if (res->srf_mode_all)
645921a55d8Smrg	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
646921a55d8Smrg
647921a55d8Smrg    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
648921a55d8Smrg			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
649921a55d8Smrg			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
650921a55d8Smrg			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
651921a55d8Smrg
652921a55d8Smrg    if (res->uncached)
653921a55d8Smrg	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
654921a55d8Smrg
655921a55d8Smrg    /* XXX ??? */
656921a55d8Smrg    sq_vtx_constant_word4 = 0;
657921a55d8Smrg
658921a55d8Smrg    /* flush vertex cache */
659921a55d8Smrg    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
660921a55d8Smrg	(info->ChipFamily == CHIP_FAMILY_PALM) ||
66140732134Srjs	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
66240732134Srjs	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
663c4ae5be6Smrg	(info->ChipFamily == CHIP_FAMILY_CAICOS) ||
66440732134Srjs	(info->ChipFamily == CHIP_FAMILY_CAYMAN) ||
66540732134Srjs	(info->ChipFamily == CHIP_FAMILY_ARUBA))
666921a55d8Smrg	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
66743df4709Smrg				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
668921a55d8Smrg				      res->bo,
669921a55d8Smrg				      domain, 0);
670921a55d8Smrg    else
671921a55d8Smrg	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
67243df4709Smrg				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
673921a55d8Smrg				      res->bo,
674921a55d8Smrg				      domain, 0);
675921a55d8Smrg
676921a55d8Smrg    BEGIN_BATCH(10 + 2);
677921a55d8Smrg    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
678921a55d8Smrg    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
679921a55d8Smrg    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
680921a55d8Smrg    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
681921a55d8Smrg    E32(sq_vtx_constant_word3);		// 3: swizzles
682921a55d8Smrg    E32(sq_vtx_constant_word4);		// 4: num elements
683921a55d8Smrg    E32(0);							// 5: n/a
684921a55d8Smrg    E32(0);							// 6: n/a
685921a55d8Smrg    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
686921a55d8Smrg    RELOC_BATCH(res->bo, domain, 0);
687921a55d8Smrg    END_BATCH();
688921a55d8Smrg}
689921a55d8Smrg
690c4ae5be6Smrg/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
691c4ae5be6Smrg * but none that we use here.
692c4ae5be6Smrg */
693921a55d8Smrgvoid
694921a55d8Smrgevergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
695921a55d8Smrg{
696921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
697921a55d8Smrg    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
698921a55d8Smrg    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
69940732134Srjs    uint32_t array_mode, pitch, tile_split, macro_aspect, bankw, bankh, nbanks;
70040732134Srjs
70143df4709Smrg#if defined(XF86DRM_MODE)
70240732134Srjs    if (tex_res->surface) {
70340732134Srjs	switch (tex_res->surface->level[0].mode) {
70440732134Srjs	case RADEON_SURF_MODE_1D:
70540732134Srjs		array_mode = 2;
70640732134Srjs		break;
70740732134Srjs	case RADEON_SURF_MODE_2D:
70840732134Srjs		array_mode = 4;
70940732134Srjs		break;
71040732134Srjs	default:
71140732134Srjs		array_mode = 0;
71240732134Srjs		break;
71340732134Srjs	}
71440732134Srjs	pitch = tex_res->surface->level[0].nblk_x >> 3;
71540732134Srjs	tile_split = tex_res->surface->tile_split;
71640732134Srjs	macro_aspect = tex_res->surface->mtilea;
71740732134Srjs	bankw = tex_res->surface->bankw;
71840732134Srjs	bankh = tex_res->surface->bankh;
71940732134Srjs	tile_split = eg_tile_split(tile_split);
72040732134Srjs	macro_aspect = eg_macro_tile_aspect(macro_aspect);
72140732134Srjs	bankw = eg_bank_wh(bankw);
72240732134Srjs	bankh = eg_bank_wh(bankh);
72343df4709Smrg    } else
72443df4709Smrg#endif
72543df4709Smrg    {
72640732134Srjs	array_mode = tex_res->array_mode;
72740732134Srjs	pitch = (tex_res->pitch + 7) >> 3;
72840732134Srjs	tile_split = 4;
72940732134Srjs	macro_aspect = 0;
73040732134Srjs	bankw = 0;
73140732134Srjs	bankh = 0;
73240732134Srjs    }
73340732134Srjs    nbanks = info->num_banks;
73440732134Srjs    nbanks = eg_nbanks(nbanks);
735921a55d8Smrg
736921a55d8Smrg    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
737921a55d8Smrg
738921a55d8Smrg    if (tex_res->w)
73940732134Srjs	sq_tex_resource_word0 |= ( ((pitch - 1) << PITCH_shift) |
74040732134Srjs				   ((tex_res->w - 1) << TEX_WIDTH_shift) );
741921a55d8Smrg
742921a55d8Smrg    if (tex_res->tile_type)
743921a55d8Smrg	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
744921a55d8Smrg
74540732134Srjs    sq_tex_resource_word1 = (array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
746921a55d8Smrg
747921a55d8Smrg    if (tex_res->h)
748921a55d8Smrg	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
749921a55d8Smrg    if (tex_res->depth)
750921a55d8Smrg	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
751921a55d8Smrg
752921a55d8Smrg    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
753921a55d8Smrg			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
754921a55d8Smrg			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
755921a55d8Smrg			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
756921a55d8Smrg			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
757921a55d8Smrg			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
758921a55d8Smrg			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
759921a55d8Smrg			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
760921a55d8Smrg			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
761921a55d8Smrg			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
762921a55d8Smrg			     (tex_res->base_level << BASE_LEVEL_shift));
763921a55d8Smrg
764921a55d8Smrg    if (tex_res->srf_mode_all)
765921a55d8Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
766921a55d8Smrg    if (tex_res->force_degamma)
767921a55d8Smrg	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
768921a55d8Smrg
769921a55d8Smrg    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
770921a55d8Smrg			     (tex_res->base_array << BASE_ARRAY_shift) |
771921a55d8Smrg			     (tex_res->last_array << LAST_ARRAY_shift));
772921a55d8Smrg
773921a55d8Smrg    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
77440732134Srjs			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
77540732134Srjs			     (tile_split << SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift));
776921a55d8Smrg
777921a55d8Smrg    if (tex_res->interlaced)
778921a55d8Smrg	sq_tex_resource_word6 |= INTERLACED_bit;
779921a55d8Smrg
780921a55d8Smrg    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
78140732134Srjs			     (macro_aspect << SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift) |
78240732134Srjs			     (nbanks << SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift) |
78340732134Srjs			     (bankw << SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift) |
78440732134Srjs			     (bankh << SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift) |
785921a55d8Smrg			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
786921a55d8Smrg
787921a55d8Smrg    /* flush texture cache */
788921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
789921a55d8Smrg				  tex_res->size, tex_res->base,
790921a55d8Smrg				  tex_res->bo, domain, 0);
791921a55d8Smrg
792921a55d8Smrg    BEGIN_BATCH(10 + 4);
793921a55d8Smrg    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
794921a55d8Smrg    E32(sq_tex_resource_word0);
795921a55d8Smrg    E32(sq_tex_resource_word1);
796921a55d8Smrg    E32(((tex_res->base) >> 8));
797921a55d8Smrg    E32(((tex_res->mip_base) >> 8));
798921a55d8Smrg    E32(sq_tex_resource_word4);
799921a55d8Smrg    E32(sq_tex_resource_word5);
800921a55d8Smrg    E32(sq_tex_resource_word6);
801921a55d8Smrg    E32(sq_tex_resource_word7);
802921a55d8Smrg    RELOC_BATCH(tex_res->bo, domain, 0);
803921a55d8Smrg    RELOC_BATCH(tex_res->mip_bo, domain, 0);
804921a55d8Smrg    END_BATCH();
805921a55d8Smrg}
806921a55d8Smrg
807c4ae5be6Smrg/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
808c4ae5be6Smrg * but none that we use here.
809c4ae5be6Smrg */
810921a55d8Smrgvoid
811921a55d8Smrgevergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
812921a55d8Smrg{
813921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
814921a55d8Smrg    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
815921a55d8Smrg
816921a55d8Smrg    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
817921a55d8Smrg			    (s->clamp_y       << CLAMP_Y_shift)					|
818921a55d8Smrg			    (s->clamp_z       << CLAMP_Z_shift)					|
819921a55d8Smrg			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
820921a55d8Smrg			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
821921a55d8Smrg			    (s->z_filter      << Z_FILTER_shift)	|
822921a55d8Smrg			    (s->mip_filter    << MIP_FILTER_shift)				|
823921a55d8Smrg			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
824921a55d8Smrg			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
825921a55d8Smrg			    (s->chroma_key    << CHROMA_KEY_shift));
826921a55d8Smrg
827921a55d8Smrg    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
828921a55d8Smrg			    (s->max_lod       << MAX_LOD_shift)					|
829921a55d8Smrg			    (s->perf_mip      << PERF_MIP_shift)	|
830921a55d8Smrg			    (s->perf_z        << PERF_Z_shift));
831921a55d8Smrg
832921a55d8Smrg
833921a55d8Smrg    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
834921a55d8Smrg			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
835921a55d8Smrg
836921a55d8Smrg    if (s->mc_coord_truncate)
837921a55d8Smrg	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
838921a55d8Smrg    if (s->force_degamma)
839921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
840921a55d8Smrg    if (s->truncate_coord)
841921a55d8Smrg	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
842921a55d8Smrg    if (s->disable_cube_wrap)
843921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
844921a55d8Smrg    if (s->type)
845921a55d8Smrg	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
846921a55d8Smrg
847921a55d8Smrg    BEGIN_BATCH(5);
848921a55d8Smrg    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
849921a55d8Smrg    E32(sq_tex_sampler_word0);
850921a55d8Smrg    E32(sq_tex_sampler_word1);
851921a55d8Smrg    E32(sq_tex_sampler_word2);
852921a55d8Smrg    END_BATCH();
853921a55d8Smrg}
854921a55d8Smrg
855c4ae5be6Smrg/* workarounds for hw bugs in eg+ */
856c4ae5be6Smrg/* only affects screen/window/generic/vport.  cliprects are not affected */
857c4ae5be6Smrgstatic void
858c4ae5be6Smrgevergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2)
859c4ae5be6Smrg{
860c4ae5be6Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
861c4ae5be6Smrg
862c4ae5be6Smrg    /* all eg+ asics */
863c4ae5be6Smrg    if (*x2 == 0)
864c4ae5be6Smrg	*x1 = 1;
865c4ae5be6Smrg    if (*y2 == 0)
866c4ae5be6Smrg	*y1 = 1;
867c4ae5be6Smrg
86840732134Srjs    /* cayman/tn only */
86940732134Srjs    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
870c4ae5be6Smrg	/* cliprects aren't affected so we can use them to clip if we need
871c4ae5be6Smrg	 * a true 1x1 clip region
872c4ae5be6Smrg	 */
873c4ae5be6Smrg	if ((*x2 == 1) && (*y2 == 1))
874c4ae5be6Smrg	    *x2 = 2;
875c4ae5be6Smrg    }
876c4ae5be6Smrg}
877c4ae5be6Smrg
878921a55d8Smrg//XXX deal with clip offsets in clip setup
879921a55d8Smrgvoid
880921a55d8Smrgevergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
881921a55d8Smrg{
882921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
883921a55d8Smrg
884c4ae5be6Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
885c4ae5be6Smrg
886921a55d8Smrg    BEGIN_BATCH(4);
887921a55d8Smrg    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
888921a55d8Smrg    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
889921a55d8Smrg	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
890921a55d8Smrg    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
891921a55d8Smrg	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
892921a55d8Smrg    END_BATCH();
893921a55d8Smrg}
894921a55d8Smrg
895921a55d8Smrgvoid
896921a55d8Smrgevergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
897921a55d8Smrg{
898921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
899921a55d8Smrg
900c4ae5be6Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
901c4ae5be6Smrg
902921a55d8Smrg    BEGIN_BATCH(4);
903921a55d8Smrg    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
904921a55d8Smrg    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
905921a55d8Smrg	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
906921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
907921a55d8Smrg    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
908921a55d8Smrg	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
909921a55d8Smrg    END_BATCH();
910921a55d8Smrg}
911921a55d8Smrg
912921a55d8Smrgvoid
913921a55d8Smrgevergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
914921a55d8Smrg{
915921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
916921a55d8Smrg
917c4ae5be6Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
918c4ae5be6Smrg
919921a55d8Smrg    BEGIN_BATCH(4);
920921a55d8Smrg    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
921921a55d8Smrg    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
922921a55d8Smrg	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
923921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
924921a55d8Smrg    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
925921a55d8Smrg	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
926921a55d8Smrg    END_BATCH();
927921a55d8Smrg}
928921a55d8Smrg
929921a55d8Smrgvoid
930921a55d8Smrgevergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
931921a55d8Smrg{
932921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
933921a55d8Smrg
934c4ae5be6Smrg    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
935c4ae5be6Smrg
936921a55d8Smrg    BEGIN_BATCH(4);
937921a55d8Smrg    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
938921a55d8Smrg    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
939921a55d8Smrg	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
940921a55d8Smrg	 WINDOW_OFFSET_DISABLE_bit));
941921a55d8Smrg    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
942921a55d8Smrg	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
943921a55d8Smrg    END_BATCH();
944921a55d8Smrg}
945921a55d8Smrg
946921a55d8Smrgvoid
947921a55d8Smrgevergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
948921a55d8Smrg{
949921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
950921a55d8Smrg
951921a55d8Smrg    BEGIN_BATCH(4);
952921a55d8Smrg    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
953921a55d8Smrg    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
954921a55d8Smrg	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
955921a55d8Smrg    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
956921a55d8Smrg	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
957921a55d8Smrg    END_BATCH();
958921a55d8Smrg}
959921a55d8Smrg
960921a55d8Smrg/*
961921a55d8Smrg * Setup of default state
962921a55d8Smrg */
963921a55d8Smrg
964921a55d8Smrgvoid
965921a55d8Smrgevergreen_set_default_state(ScrnInfoPtr pScrn)
966921a55d8Smrg{
967921a55d8Smrg    tex_resource_t tex_res;
968921a55d8Smrg    shader_config_t fs_conf;
969921a55d8Smrg    sq_config_t sq_conf;
970921a55d8Smrg    int i;
971921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
972921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
973921a55d8Smrg
97440732134Srjs    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
975c4ae5be6Smrg	cayman_set_default_state(pScrn);
976c4ae5be6Smrg	return;
977c4ae5be6Smrg    }
978c4ae5be6Smrg
979921a55d8Smrg    if (accel_state->XInited3D)
980921a55d8Smrg	return;
981921a55d8Smrg
982921a55d8Smrg    memset(&tex_res, 0, sizeof(tex_resource_t));
983921a55d8Smrg    memset(&fs_conf, 0, sizeof(shader_config_t));
984921a55d8Smrg
985921a55d8Smrg    accel_state->XInited3D = TRUE;
986921a55d8Smrg
987921a55d8Smrg    evergreen_start_3d(pScrn);
988921a55d8Smrg
989921a55d8Smrg    /* SQ */
990921a55d8Smrg    sq_conf.ps_prio = 0;
991921a55d8Smrg    sq_conf.vs_prio = 1;
992921a55d8Smrg    sq_conf.gs_prio = 2;
993921a55d8Smrg    sq_conf.es_prio = 3;
994921a55d8Smrg    sq_conf.hs_prio = 0;
995921a55d8Smrg    sq_conf.ls_prio = 0;
996921a55d8Smrg    sq_conf.cs_prio = 0;
997921a55d8Smrg
998921a55d8Smrg    switch (info->ChipFamily) {
999921a55d8Smrg    case CHIP_FAMILY_CEDAR:
1000921a55d8Smrg    default:
1001921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1002921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1003921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1004921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1005921a55d8Smrg	sq_conf.num_es_gprs = 31;
1006921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1007921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1008921a55d8Smrg	sq_conf.num_ps_threads = 96;
1009921a55d8Smrg	sq_conf.num_vs_threads = 16;
1010921a55d8Smrg	sq_conf.num_gs_threads = 16;
1011921a55d8Smrg	sq_conf.num_es_threads = 16;
1012921a55d8Smrg	sq_conf.num_hs_threads = 16;
1013921a55d8Smrg	sq_conf.num_ls_threads = 16;
1014921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
1015921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
1016921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
1017921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
1018921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
1019921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
1020921a55d8Smrg	break;
1021921a55d8Smrg    case CHIP_FAMILY_REDWOOD:
1022921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1023921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1024921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1025921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1026921a55d8Smrg	sq_conf.num_es_gprs = 31;
1027921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1028921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1029921a55d8Smrg	sq_conf.num_ps_threads = 128;
1030921a55d8Smrg	sq_conf.num_vs_threads = 20;
1031921a55d8Smrg	sq_conf.num_gs_threads = 20;
1032921a55d8Smrg	sq_conf.num_es_threads = 20;
1033921a55d8Smrg	sq_conf.num_hs_threads = 20;
1034921a55d8Smrg	sq_conf.num_ls_threads = 20;
1035921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
1036921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
1037921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
1038921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
1039921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
1040921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
1041921a55d8Smrg	break;
1042921a55d8Smrg    case CHIP_FAMILY_JUNIPER:
1043921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1044921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1045921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1046921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1047921a55d8Smrg	sq_conf.num_es_gprs = 31;
1048921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1049921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1050921a55d8Smrg	sq_conf.num_ps_threads = 128;
1051921a55d8Smrg	sq_conf.num_vs_threads = 20;
1052921a55d8Smrg	sq_conf.num_gs_threads = 20;
1053921a55d8Smrg	sq_conf.num_es_threads = 20;
1054921a55d8Smrg	sq_conf.num_hs_threads = 20;
1055921a55d8Smrg	sq_conf.num_ls_threads = 20;
1056921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
1057921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
1058921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
1059921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
1060921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
1061921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
1062921a55d8Smrg	break;
1063921a55d8Smrg    case CHIP_FAMILY_CYPRESS:
1064921a55d8Smrg    case CHIP_FAMILY_HEMLOCK:
1065921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1066921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1067921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1068921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1069921a55d8Smrg	sq_conf.num_es_gprs = 31;
1070921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1071921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1072921a55d8Smrg	sq_conf.num_ps_threads = 128;
1073921a55d8Smrg	sq_conf.num_vs_threads = 20;
1074921a55d8Smrg	sq_conf.num_gs_threads = 20;
1075921a55d8Smrg	sq_conf.num_es_threads = 20;
1076921a55d8Smrg	sq_conf.num_hs_threads = 20;
1077921a55d8Smrg	sq_conf.num_ls_threads = 20;
1078921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
1079921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
1080921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
1081921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
1082921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
1083921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
1084921a55d8Smrg	break;
1085921a55d8Smrg    case CHIP_FAMILY_PALM:
1086921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1087921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1088921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1089921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1090921a55d8Smrg	sq_conf.num_es_gprs = 31;
1091921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1092921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1093921a55d8Smrg	sq_conf.num_ps_threads = 96;
1094921a55d8Smrg	sq_conf.num_vs_threads = 16;
1095921a55d8Smrg	sq_conf.num_gs_threads = 16;
1096921a55d8Smrg	sq_conf.num_es_threads = 16;
1097921a55d8Smrg	sq_conf.num_hs_threads = 16;
1098921a55d8Smrg	sq_conf.num_ls_threads = 16;
1099921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
1100921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
1101921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
1102921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
1103921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
1104921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
1105921a55d8Smrg	break;
110640732134Srjs    case CHIP_FAMILY_SUMO:
110740732134Srjs	sq_conf.num_ps_gprs = 93;
110840732134Srjs	sq_conf.num_vs_gprs = 46;
110940732134Srjs	sq_conf.num_temp_gprs = 4;
111040732134Srjs	sq_conf.num_gs_gprs = 31;
111140732134Srjs	sq_conf.num_es_gprs = 31;
111240732134Srjs	sq_conf.num_hs_gprs = 23;
111340732134Srjs	sq_conf.num_ls_gprs = 23;
111440732134Srjs	sq_conf.num_ps_threads = 96;
111540732134Srjs	sq_conf.num_vs_threads = 25;
111640732134Srjs	sq_conf.num_gs_threads = 25;
111740732134Srjs	sq_conf.num_es_threads = 25;
111840732134Srjs	sq_conf.num_hs_threads = 25;
111940732134Srjs	sq_conf.num_ls_threads = 25;
112040732134Srjs	sq_conf.num_ps_stack_entries = 42;
112140732134Srjs	sq_conf.num_vs_stack_entries = 42;
112240732134Srjs	sq_conf.num_gs_stack_entries = 42;
112340732134Srjs	sq_conf.num_es_stack_entries = 42;
112440732134Srjs	sq_conf.num_hs_stack_entries = 42;
112540732134Srjs	sq_conf.num_ls_stack_entries = 42;
112640732134Srjs	break;
112740732134Srjs    case CHIP_FAMILY_SUMO2:
112840732134Srjs	sq_conf.num_ps_gprs = 93;
112940732134Srjs	sq_conf.num_vs_gprs = 46;
113040732134Srjs	sq_conf.num_temp_gprs = 4;
113140732134Srjs	sq_conf.num_gs_gprs = 31;
113240732134Srjs	sq_conf.num_es_gprs = 31;
113340732134Srjs	sq_conf.num_hs_gprs = 23;
113440732134Srjs	sq_conf.num_ls_gprs = 23;
113540732134Srjs	sq_conf.num_ps_threads = 96;
113640732134Srjs	sq_conf.num_vs_threads = 25;
113740732134Srjs	sq_conf.num_gs_threads = 25;
113840732134Srjs	sq_conf.num_es_threads = 25;
113940732134Srjs	sq_conf.num_hs_threads = 25;
114040732134Srjs	sq_conf.num_ls_threads = 25;
114140732134Srjs	sq_conf.num_ps_stack_entries = 85;
114240732134Srjs	sq_conf.num_vs_stack_entries = 85;
114340732134Srjs	sq_conf.num_gs_stack_entries = 85;
114440732134Srjs	sq_conf.num_es_stack_entries = 85;
114540732134Srjs	sq_conf.num_hs_stack_entries = 85;
114640732134Srjs	sq_conf.num_ls_stack_entries = 85;
114740732134Srjs	break;
1148921a55d8Smrg    case CHIP_FAMILY_BARTS:
1149921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1150921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1151921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1152921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1153921a55d8Smrg	sq_conf.num_es_gprs = 31;
1154921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1155921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1156921a55d8Smrg	sq_conf.num_ps_threads = 128;
1157921a55d8Smrg	sq_conf.num_vs_threads = 20;
1158921a55d8Smrg	sq_conf.num_gs_threads = 20;
1159921a55d8Smrg	sq_conf.num_es_threads = 20;
1160921a55d8Smrg	sq_conf.num_hs_threads = 20;
1161921a55d8Smrg	sq_conf.num_ls_threads = 20;
1162921a55d8Smrg	sq_conf.num_ps_stack_entries = 85;
1163921a55d8Smrg	sq_conf.num_vs_stack_entries = 85;
1164921a55d8Smrg	sq_conf.num_gs_stack_entries = 85;
1165921a55d8Smrg	sq_conf.num_es_stack_entries = 85;
1166921a55d8Smrg	sq_conf.num_hs_stack_entries = 85;
1167921a55d8Smrg	sq_conf.num_ls_stack_entries = 85;
1168921a55d8Smrg	break;
1169921a55d8Smrg    case CHIP_FAMILY_TURKS:
1170921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1171921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1172921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1173921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1174921a55d8Smrg	sq_conf.num_es_gprs = 31;
1175921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1176921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1177921a55d8Smrg	sq_conf.num_ps_threads = 128;
1178921a55d8Smrg	sq_conf.num_vs_threads = 20;
1179921a55d8Smrg	sq_conf.num_gs_threads = 20;
1180921a55d8Smrg	sq_conf.num_es_threads = 20;
1181921a55d8Smrg	sq_conf.num_hs_threads = 20;
1182921a55d8Smrg	sq_conf.num_ls_threads = 20;
1183921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
1184921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
1185921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
1186921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
1187921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
1188921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
1189921a55d8Smrg	break;
1190921a55d8Smrg    case CHIP_FAMILY_CAICOS:
1191921a55d8Smrg	sq_conf.num_ps_gprs = 93;
1192921a55d8Smrg	sq_conf.num_vs_gprs = 46;
1193921a55d8Smrg	sq_conf.num_temp_gprs = 4;
1194921a55d8Smrg	sq_conf.num_gs_gprs = 31;
1195921a55d8Smrg	sq_conf.num_es_gprs = 31;
1196921a55d8Smrg	sq_conf.num_hs_gprs = 23;
1197921a55d8Smrg	sq_conf.num_ls_gprs = 23;
1198921a55d8Smrg	sq_conf.num_ps_threads = 128;
1199921a55d8Smrg	sq_conf.num_vs_threads = 10;
1200921a55d8Smrg	sq_conf.num_gs_threads = 10;
1201921a55d8Smrg	sq_conf.num_es_threads = 10;
1202921a55d8Smrg	sq_conf.num_hs_threads = 10;
1203921a55d8Smrg	sq_conf.num_ls_threads = 10;
1204921a55d8Smrg	sq_conf.num_ps_stack_entries = 42;
1205921a55d8Smrg	sq_conf.num_vs_stack_entries = 42;
1206921a55d8Smrg	sq_conf.num_gs_stack_entries = 42;
1207921a55d8Smrg	sq_conf.num_es_stack_entries = 42;
1208921a55d8Smrg	sq_conf.num_hs_stack_entries = 42;
1209921a55d8Smrg	sq_conf.num_ls_stack_entries = 42;
1210921a55d8Smrg	break;
1211921a55d8Smrg    }
1212921a55d8Smrg
1213921a55d8Smrg    evergreen_sq_setup(pScrn, &sq_conf);
1214921a55d8Smrg
121540732134Srjs    BEGIN_BATCH(27);
1216921a55d8Smrg    EREG(SQ_LDS_ALLOC_PS, 0);
121740732134Srjs    EREG(SQ_LDS_RESOURCE_MGMT, 0x10001000);
1218921a55d8Smrg    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
1219921a55d8Smrg
1220921a55d8Smrg    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
1221921a55d8Smrg    E32(0);
1222921a55d8Smrg    E32(0);
1223921a55d8Smrg    E32(0);
1224921a55d8Smrg    E32(0);
1225921a55d8Smrg    E32(0);
1226921a55d8Smrg    E32(0);
1227921a55d8Smrg
1228921a55d8Smrg    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
1229921a55d8Smrg    E32(0);
1230921a55d8Smrg    E32(0);
1231921a55d8Smrg    E32(0);
1232921a55d8Smrg    E32(0);
1233921a55d8Smrg
1234921a55d8Smrg    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
1235921a55d8Smrg    E32(0);
1236921a55d8Smrg    E32(0);
1237921a55d8Smrg    END_BATCH();
1238921a55d8Smrg
1239921a55d8Smrg    /* DB */
1240921a55d8Smrg    BEGIN_BATCH(3 + 2);
1241921a55d8Smrg    EREG(DB_Z_INFO,                           0);
1242921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1243921a55d8Smrg    END_BATCH();
1244921a55d8Smrg
1245921a55d8Smrg    BEGIN_BATCH(3 + 2);
1246921a55d8Smrg    EREG(DB_STENCIL_INFO,                     0);
1247921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1248921a55d8Smrg    END_BATCH();
1249921a55d8Smrg
1250921a55d8Smrg    BEGIN_BATCH(3 + 2);
1251921a55d8Smrg    EREG(DB_HTILE_DATA_BASE,                    0);
1252921a55d8Smrg    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1253921a55d8Smrg    END_BATCH();
1254921a55d8Smrg
1255921a55d8Smrg    BEGIN_BATCH(49);
1256921a55d8Smrg    EREG(DB_DEPTH_CONTROL,                    0);
1257921a55d8Smrg
1258921a55d8Smrg    PACK0(PA_SC_VPORT_ZMIN_0, 2);
1259921a55d8Smrg    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1260921a55d8Smrg    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1261921a55d8Smrg
1262921a55d8Smrg    PACK0(DB_RENDER_CONTROL, 5);
1263921a55d8Smrg    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1264921a55d8Smrg    E32(0); // DB_COUNT_CONTROL
1265921a55d8Smrg    E32(0); // DB_DEPTH_VIEW
1266921a55d8Smrg    E32(0x2a); // DB_RENDER_OVERRIDE
1267921a55d8Smrg    E32(0); // DB_RENDER_OVERRIDE2
1268921a55d8Smrg
1269921a55d8Smrg    PACK0(DB_STENCIL_CLEAR, 2);
1270921a55d8Smrg    E32(0); // DB_STENCIL_CLEAR
1271921a55d8Smrg    E32(0); // DB_DEPTH_CLEAR
1272921a55d8Smrg
1273921a55d8Smrg    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1274921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1275921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1276921a55d8Smrg					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1277921a55d8Smrg
1278921a55d8Smrg    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1279921a55d8Smrg			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1280921a55d8Smrg
1281921a55d8Smrg    // SX
1282921a55d8Smrg    EREG(SX_MISC,               0);
1283921a55d8Smrg
1284921a55d8Smrg    // CB
1285921a55d8Smrg    PACK0(SX_ALPHA_TEST_CONTROL, 5);
1286921a55d8Smrg    E32(0); // SX_ALPHA_TEST_CONTROL
1287921a55d8Smrg    E32(0x00000000); //CB_BLEND_RED
1288921a55d8Smrg    E32(0x00000000); //CB_BLEND_GREEN
1289921a55d8Smrg    E32(0x00000000); //CB_BLEND_BLUE
1290921a55d8Smrg    E32(0x00000000); //CB_BLEND_ALPHA
1291921a55d8Smrg
1292921a55d8Smrg    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1293921a55d8Smrg
1294921a55d8Smrg    // SC
1295921a55d8Smrg    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1296921a55d8Smrg					       (0 << WINDOW_Y_OFFSET_shift)));
1297921a55d8Smrg    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1298921a55d8Smrg    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1299921a55d8Smrg    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1300921a55d8Smrg    END_BATCH();
1301921a55d8Smrg
1302921a55d8Smrg    /* clip boolean is set to always visible -> doesn't matter */
1303921a55d8Smrg    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1304921a55d8Smrg	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1305921a55d8Smrg
1306921a55d8Smrg    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1307921a55d8Smrg	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1308921a55d8Smrg
1309b13dfe66Smrg    BEGIN_BATCH(57);
1310921a55d8Smrg    PACK0(PA_SC_MODE_CNTL_0, 2);
1311921a55d8Smrg    E32(0); // PA_SC_MODE_CNTL_0
1312921a55d8Smrg    E32(0); // PA_SC_MODE_CNTL_1
1313921a55d8Smrg
1314921a55d8Smrg    PACK0(PA_SC_LINE_CNTL, 16);
1315921a55d8Smrg    E32(0); // PA_SC_LINE_CNTL
1316921a55d8Smrg    E32(0); // PA_SC_AA_CONFIG
1317921a55d8Smrg    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1318921a55d8Smrg	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1319921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1320921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1321921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1322921a55d8Smrg    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1323921a55d8Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1324921a55d8Smrg    E32(0);
1325921a55d8Smrg    E32(0);
1326921a55d8Smrg    E32(0);
1327921a55d8Smrg    E32(0);
1328921a55d8Smrg    E32(0);
1329921a55d8Smrg    E32(0);
1330921a55d8Smrg    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1331921a55d8Smrg    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1332921a55d8Smrg
1333921a55d8Smrg    // CL
1334921a55d8Smrg    PACK0(PA_CL_CLIP_CNTL, 8);
1335921a55d8Smrg    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1336921a55d8Smrg    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1337921a55d8Smrg    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1338921a55d8Smrg    E32(0); // PA_CL_VS_OUT_CNTL
1339921a55d8Smrg    E32(0); // PA_CL_NANINF_CNTL
1340921a55d8Smrg    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1341921a55d8Smrg    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1342921a55d8Smrg    E32(0); // PA_SU_PRIM_FILTER_CNTL
1343921a55d8Smrg
1344921a55d8Smrg    // SU
1345921a55d8Smrg    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1346921a55d8Smrg    E32(0);
1347921a55d8Smrg    E32(0);
1348921a55d8Smrg    E32(0);
1349921a55d8Smrg    E32(0);
1350921a55d8Smrg    E32(0);
1351921a55d8Smrg    E32(0);
1352921a55d8Smrg
1353b13dfe66Smrg    /* src = semantic id 0; mask = semantic id 1 */
1354b13dfe66Smrg    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1355b13dfe66Smrg			   (1 << SEMANTIC_1_shift)));
1356b13dfe66Smrg    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1357b13dfe66Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1358b13dfe66Smrg    E32(((0    << SEMANTIC_shift)	|
1359b13dfe66Smrg	 (0x01 << DEFAULT_VAL_shift)));
1360b13dfe66Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1361b13dfe66Smrg    E32(((1    << SEMANTIC_shift)	|
1362b13dfe66Smrg	 (0x01 << DEFAULT_VAL_shift)));
1363b13dfe66Smrg
1364921a55d8Smrg    PACK0(SPI_INPUT_Z, 8);
1365921a55d8Smrg    E32(0); // SPI_INPUT_Z
1366921a55d8Smrg    E32(0); // SPI_FOG_CNTL
1367921a55d8Smrg    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1368921a55d8Smrg    E32(0); // SPI_PS_IN_CONTROL_2
1369921a55d8Smrg    E32(0);
1370921a55d8Smrg    E32(0);
1371921a55d8Smrg    E32(0);
1372921a55d8Smrg    E32(0);
1373921a55d8Smrg    END_BATCH();
1374921a55d8Smrg
1375921a55d8Smrg    // clear FS
1376921a55d8Smrg    fs_conf.bo = accel_state->shaders_bo;
1377921a55d8Smrg    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1378921a55d8Smrg
1379921a55d8Smrg    // VGT
1380921a55d8Smrg    BEGIN_BATCH(46);
1381921a55d8Smrg
1382921a55d8Smrg    PACK0(VGT_MAX_VTX_INDX, 4);
1383921a55d8Smrg    E32(0xffffff);
1384921a55d8Smrg    E32(0);
1385921a55d8Smrg    E32(0);
1386921a55d8Smrg    E32(0);
1387921a55d8Smrg
1388921a55d8Smrg    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1389921a55d8Smrg    E32(0);
1390921a55d8Smrg    E32(0);
1391921a55d8Smrg
1392921a55d8Smrg    PACK0(VGT_REUSE_OFF, 2);
1393921a55d8Smrg    E32(0);
1394921a55d8Smrg    E32(0);
1395921a55d8Smrg
1396921a55d8Smrg    PACK0(PA_SU_POINT_SIZE, 17);
1397921a55d8Smrg    E32(0); // PA_SU_POINT_SIZE
1398921a55d8Smrg    E32(0); // PA_SU_POINT_MINMAX
1399921a55d8Smrg    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1400921a55d8Smrg    E32(0); // PA_SC_LINE_STIPPLE
1401921a55d8Smrg    E32(0); // VGT_OUTPUT_PATH_CNTL
1402921a55d8Smrg    E32(0); // VGT_HOS_CNTL
1403921a55d8Smrg    E32(0);
1404921a55d8Smrg    E32(0);
1405921a55d8Smrg    E32(0);
1406921a55d8Smrg    E32(0);
1407921a55d8Smrg    E32(0);
1408921a55d8Smrg    E32(0);
1409921a55d8Smrg    E32(0);
1410921a55d8Smrg    E32(0);
1411921a55d8Smrg    E32(0);
1412921a55d8Smrg    E32(0);
1413921a55d8Smrg    E32(0); // VGT_GS_MODE
1414921a55d8Smrg
1415921a55d8Smrg    EREG(VGT_PRIMITIVEID_EN,                  0);
1416921a55d8Smrg    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1417921a55d8Smrg    EREG(VGT_SHADER_STAGES_EN,          0);
1418921a55d8Smrg
1419921a55d8Smrg    PACK0(VGT_STRMOUT_CONFIG, 2);
1420921a55d8Smrg    E32(0);
1421921a55d8Smrg    E32(0);
1422921a55d8Smrg    END_BATCH();
1423921a55d8Smrg}
1424921a55d8Smrg
1425921a55d8Smrg
1426921a55d8Smrg/*
1427921a55d8Smrg * Commands
1428921a55d8Smrg */
1429921a55d8Smrg
1430921a55d8Smrgvoid
1431921a55d8Smrgevergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1432921a55d8Smrg{
1433921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1434921a55d8Smrg
1435921a55d8Smrg    BEGIN_BATCH(10);
1436921a55d8Smrg    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1437921a55d8Smrg    PACK3(IT_INDEX_TYPE, 1);
1438b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1439b13dfe66Smrg    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1440b13dfe66Smrg#else
1441921a55d8Smrg    E32(draw_conf->index_type);
1442b13dfe66Smrg#endif
1443921a55d8Smrg    PACK3(IT_NUM_INSTANCES, 1);
1444921a55d8Smrg    E32(draw_conf->num_instances);
1445921a55d8Smrg    PACK3(IT_DRAW_INDEX_AUTO, 2);
1446921a55d8Smrg    E32(draw_conf->num_indices);
1447921a55d8Smrg    E32(draw_conf->vgt_draw_initiator);
1448921a55d8Smrg    END_BATCH();
1449921a55d8Smrg}
1450921a55d8Smrg
1451921a55d8Smrgvoid evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1452921a55d8Smrg{
1453921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1454921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1455921a55d8Smrg    draw_config_t   draw_conf;
1456921a55d8Smrg    vtx_resource_t  vtx_res;
1457921a55d8Smrg
1458921a55d8Smrg    if (accel_state->vbo.vb_start_op == -1)
1459921a55d8Smrg      return;
1460921a55d8Smrg
1461921a55d8Smrg    CLEAR (draw_conf);
1462921a55d8Smrg    CLEAR (vtx_res);
1463921a55d8Smrg
1464921a55d8Smrg    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1465921a55d8Smrg	radeon_ib_discard(pScrn);
1466921a55d8Smrg	radeon_cs_flush_indirect(pScrn);
1467921a55d8Smrg	return;
1468921a55d8Smrg    }
1469921a55d8Smrg
1470921a55d8Smrg    /* Vertex buffer setup */
1471921a55d8Smrg    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1472921a55d8Smrg    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1473921a55d8Smrg    vtx_res.vtx_size_dw     = vtx_size / 4;
1474921a55d8Smrg    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
147543df4709Smrg    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
1476921a55d8Smrg    vtx_res.bo              = accel_state->vbo.vb_bo;
1477921a55d8Smrg    vtx_res.dst_sel_x       = SQ_SEL_X;
1478921a55d8Smrg    vtx_res.dst_sel_y       = SQ_SEL_Y;
1479921a55d8Smrg    vtx_res.dst_sel_z       = SQ_SEL_Z;
1480921a55d8Smrg    vtx_res.dst_sel_w       = SQ_SEL_W;
1481b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1482b13dfe66Smrg    vtx_res.endian          = SQ_ENDIAN_8IN32;
1483b13dfe66Smrg#endif
1484921a55d8Smrg    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1485921a55d8Smrg
1486921a55d8Smrg    /* Draw */
1487921a55d8Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
1488921a55d8Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1489921a55d8Smrg    draw_conf.num_instances      = 1;
1490921a55d8Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1491921a55d8Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1492921a55d8Smrg
1493921a55d8Smrg    evergreen_draw_auto(pScrn, &draw_conf);
1494921a55d8Smrg
1495921a55d8Smrg    /* sync dst surface */
1496921a55d8Smrg    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
149743df4709Smrg				  accel_state->dst_size, accel_state->dst_obj.offset,
1498921a55d8Smrg				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1499921a55d8Smrg
1500921a55d8Smrg    accel_state->vbo.vb_start_op = -1;
1501921a55d8Smrg    accel_state->cbuf.vb_start_op = -1;
1502921a55d8Smrg    accel_state->ib_reset_op = 0;
1503921a55d8Smrg
1504921a55d8Smrg}
1505921a55d8Smrg
150643df4709Smrg#endif
1507