evergreen_accel.c revision 921a55d8
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#ifdef XF86DRM_MODE
31
32#include "xf86.h"
33
34#include <errno.h>
35
36#include "radeon.h"
37#include "evergreen_shader.h"
38#include "radeon_reg.h"
39#include "evergreen_reg.h"
40#include "evergreen_state.h"
41
42#include "radeon_drm.h"
43#include "radeon_vbo.h"
44#include "radeon_exa_shared.h"
45
46void
47evergreen_start_3d(ScrnInfoPtr pScrn)
48{
49    RADEONInfoPtr info = RADEONPTR(pScrn);
50
51    BEGIN_BATCH(3);
52    PACK3(IT_CONTEXT_CONTROL, 2);
53    E32(0x80000000);
54    E32(0x80000000);
55    END_BATCH();
56
57}
58
59/*
60 * Setup of functional groups
61 */
62
63// asic stack/thread/gpr limits - need to query the drm
64static void
65evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
66{
67    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
68    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
69    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
70    RADEONInfoPtr info = RADEONPTR(pScrn);
71
72    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
73	(info->ChipFamily == CHIP_FAMILY_PALM) ||
74	(info->ChipFamily == CHIP_FAMILY_CAICOS))
75	sq_config = 0;
76    else
77	sq_config = VC_ENABLE_bit;
78
79    sq_config |= (EXPORT_SRC_C_bit |
80		  (sq_conf->cs_prio << CS_PRIO_shift) |
81		  (sq_conf->ls_prio << LS_PRIO_shift) |
82		  (sq_conf->hs_prio << HS_PRIO_shift) |
83		  (sq_conf->ps_prio << PS_PRIO_shift) |
84		  (sq_conf->vs_prio << VS_PRIO_shift) |
85		  (sq_conf->gs_prio << GS_PRIO_shift) |
86		  (sq_conf->es_prio << ES_PRIO_shift));
87
88    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
89			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
90			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
91    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
92			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
93    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
94			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
95
96    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
97			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
98			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
99			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
100    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
101				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
102
103    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
104				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
105
106    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
107				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
108
109    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
110				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
111
112    BEGIN_BATCH(16);
113    /* disable dyn gprs */
114    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
115    PACK0(SQ_CONFIG, 4);
116    E32(sq_config);
117    E32(sq_gpr_resource_mgmt_1);
118    E32(sq_gpr_resource_mgmt_2);
119    E32(sq_gpr_resource_mgmt_3);
120    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
121    E32(sq_thread_resource_mgmt);
122    E32(sq_thread_resource_mgmt_2);
123    E32(sq_stack_resource_mgmt_1);
124    E32(sq_stack_resource_mgmt_2);
125    E32(sq_stack_resource_mgmt_3);
126    END_BATCH();
127}
128
129void
130evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
131{
132    uint32_t cb_color_info, cb_color_attrib, cb_color_dim;
133    int pitch, slice, h;
134    RADEONInfoPtr info = RADEONPTR(pScrn);
135
136    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
137		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
138		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
139		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
140		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
141		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
142		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
143    if (cb_conf->blend_clamp)
144	cb_color_info |= BLEND_CLAMP_bit;
145    if (cb_conf->fast_clear)
146	cb_color_info |= FAST_CLEAR_bit;
147    if (cb_conf->compression)
148	cb_color_info |= COMPRESSION_bit;
149    if (cb_conf->blend_bypass)
150	cb_color_info |= BLEND_BYPASS_bit;
151    if (cb_conf->simple_float)
152	cb_color_info |= SIMPLE_FLOAT_bit;
153    if (cb_conf->round_mode)
154	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
155    if (cb_conf->tile_compact)
156	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
157    if (cb_conf->rat)
158	cb_color_info |= RAT_bit;
159
160    /* bit 4 needs to be set for linear and depth/stencil surfaces */
161    cb_color_attrib = CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
162
163    pitch = (cb_conf->w / 8) - 1;
164    h = RADEON_ALIGN(cb_conf->h, 8);
165    slice = ((cb_conf->w * h) / 64) - 1;
166
167    switch (cb_conf->resource_type) {
168    case BUFFER:
169	/* number of elements in the surface */
170	cb_color_dim = pitch * slice;
171	break;
172    default:
173	/* w/h of the surface */
174	cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
175			((cb_conf->h - 1) << HEIGHT_MAX_shift));
176	break;
177    }
178
179    BEGIN_BATCH(3 + 2);
180    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
181    RELOC_BATCH(cb_conf->bo, 0, domain);
182    END_BATCH();
183
184    /* Set CMASK & FMASK buffer to the offset of color buffer as
185     * we don't use those this shouldn't cause any issue and we
186     * then have a valid cmd stream
187     */
188    BEGIN_BATCH(3 + 2);
189    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
190    RELOC_BATCH(cb_conf->bo, 0, domain);
191    END_BATCH();
192    BEGIN_BATCH(3 + 2);
193    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
194    RELOC_BATCH(cb_conf->bo, 0, domain);
195    END_BATCH();
196
197    /* tiling config */
198    BEGIN_BATCH(3 + 2);
199    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
200    RELOC_BATCH(cb_conf->bo, 0, domain);
201    END_BATCH();
202    BEGIN_BATCH(3 + 2);
203    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
204    RELOC_BATCH(cb_conf->bo, 0, domain);
205    END_BATCH();
206
207    BEGIN_BATCH(24);
208    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
209    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
210    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
211    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
212    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
213    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
214    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
215    E32(0);
216    E32(0);
217    E32(0);
218    E32(0);
219    END_BATCH();
220}
221
222static void
223evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
224			      uint32_t size, uint64_t mc_addr,
225			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
226{
227    RADEONInfoPtr info = RADEONPTR(pScrn);
228    uint32_t cp_coher_size;
229    if (size == 0xffffffff)
230	cp_coher_size = 0xffffffff;
231    else
232	cp_coher_size = ((size + 255) >> 8);
233
234    BEGIN_BATCH(5 + 2);
235    PACK3(IT_SURFACE_SYNC, 4);
236    E32(sync_type);
237    E32(cp_coher_size);
238    E32((mc_addr >> 8));
239    E32(10); /* poll interval */
240    RELOC_BATCH(bo, rdomains, wdomain);
241    END_BATCH();
242}
243
244/* inserts a wait for vline in the command stream */
245void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
246				  xf86CrtcPtr crtc, int start, int stop)
247{
248    RADEONInfoPtr  info = RADEONPTR(pScrn);
249    drmmode_crtc_private_ptr drmmode_crtc;
250    uint32_t offset;
251
252    if (!crtc)
253        return;
254
255    drmmode_crtc = crtc->driver_private;
256
257    if (stop < start)
258        return;
259
260    if (!crtc->enabled)
261        return;
262
263    if (info->cs) {
264        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
265	    return;
266    } else {
267#ifdef USE_EXA
268	if (info->useEXA)
269	    offset = exaGetPixmapOffset(pPix);
270	else
271#endif
272	    offset = pPix->devPrivate.ptr - info->FB;
273
274	/* if drawing to front buffer */
275	if (offset != 0)
276	    return;
277    }
278
279    start = max(start, 0);
280    stop = min(stop, crtc->mode.VDisplay);
281
282    if (start > crtc->mode.VDisplay)
283        return;
284
285    BEGIN_BATCH(11);
286    /* set the VLINE range */
287    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
288	 (start << EVERGREEN_VLINE_START_SHIFT) |
289	 (stop << EVERGREEN_VLINE_END_SHIFT));
290
291    /* tell the CP to poll the VLINE state register */
292    PACK3(IT_WAIT_REG_MEM, 6);
293    E32(IT_WAIT_REG | IT_WAIT_EQ);
294    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
295    E32(0);
296    E32(0);                          // Ref value
297    E32(EVERGREEN_VLINE_STAT);    // Mask
298    E32(10);                         // Wait interval
299    /* add crtc reloc */
300    PACK3(IT_NOP, 1);
301    E32(drmmode_crtc->mode_crtc->crtc_id);
302    END_BATCH();
303}
304
305void
306evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
307{
308    RADEONInfoPtr info = RADEONPTR(pScrn);
309    uint32_t sq_pgm_resources;
310
311    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
312			(fs_conf->stack_size << STACK_SIZE_shift));
313
314    if (fs_conf->dx10_clamp)
315	sq_pgm_resources |= DX10_CLAMP_bit;
316
317    BEGIN_BATCH(3 + 2);
318    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
319    RELOC_BATCH(fs_conf->bo, domain, 0);
320    END_BATCH();
321
322    BEGIN_BATCH(3);
323    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
324    END_BATCH();
325}
326
327void
328evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
329{
330    RADEONInfoPtr info = RADEONPTR(pScrn);
331    uint32_t sq_pgm_resources, sq_pgm_resources_2;
332
333    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
334			(vs_conf->stack_size << STACK_SIZE_shift));
335
336    if (vs_conf->dx10_clamp)
337	sq_pgm_resources |= DX10_CLAMP_bit;
338    if (vs_conf->uncached_first_inst)
339	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
340
341    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
342			  (vs_conf->double_round << DOUBLE_ROUND_shift));
343
344    if (vs_conf->allow_sdi)
345	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
346    if (vs_conf->allow_sd0)
347	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
348    if (vs_conf->allow_ddi)
349	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
350    if (vs_conf->allow_ddo)
351	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
352
353    /* flush SQ cache */
354    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
355				  vs_conf->shader_size, vs_conf->shader_addr,
356				  vs_conf->bo, domain, 0);
357
358    BEGIN_BATCH(3 + 2);
359    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
360    RELOC_BATCH(vs_conf->bo, domain, 0);
361    END_BATCH();
362
363    BEGIN_BATCH(4);
364    PACK0(SQ_PGM_RESOURCES_VS, 2);
365    E32(sq_pgm_resources);
366    E32(sq_pgm_resources_2);
367    END_BATCH();
368}
369
370void
371evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
372{
373    RADEONInfoPtr info = RADEONPTR(pScrn);
374    uint32_t sq_pgm_resources, sq_pgm_resources_2;
375
376    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
377			(ps_conf->stack_size << STACK_SIZE_shift));
378
379    if (ps_conf->dx10_clamp)
380	sq_pgm_resources |= DX10_CLAMP_bit;
381    if (ps_conf->uncached_first_inst)
382	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
383    if (ps_conf->clamp_consts)
384	sq_pgm_resources |= CLAMP_CONSTS_bit;
385
386    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
387			  (ps_conf->double_round << DOUBLE_ROUND_shift));
388
389    if (ps_conf->allow_sdi)
390	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
391    if (ps_conf->allow_sd0)
392	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
393    if (ps_conf->allow_ddi)
394	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
395    if (ps_conf->allow_ddo)
396	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
397
398    /* flush SQ cache */
399    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
400				  ps_conf->shader_size, ps_conf->shader_addr,
401				  ps_conf->bo, domain, 0);
402
403    BEGIN_BATCH(3 + 2);
404    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
405    RELOC_BATCH(ps_conf->bo, domain, 0);
406    END_BATCH();
407
408    BEGIN_BATCH(5);
409    PACK0(SQ_PGM_RESOURCES_PS, 3);
410    E32(sq_pgm_resources);
411    E32(sq_pgm_resources_2);
412    E32(ps_conf->export_mode);
413    END_BATCH();
414}
415
416void
417evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
418{
419    RADEONInfoPtr info = RADEONPTR(pScrn);
420    /* size reg is units of 16 consts (4 dwords each) */
421    uint32_t size = const_conf->size_bytes >> 8;
422
423    if (size == 0)
424	size = 1;
425
426    /* flush SQ cache */
427    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
428				  const_conf->size_bytes, const_conf->const_addr,
429				  const_conf->bo, domain, 0);
430
431    switch (const_conf->type) {
432    case SHADER_TYPE_VS:
433	BEGIN_BATCH(3);
434	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
435	END_BATCH();
436	BEGIN_BATCH(3 + 2);
437	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
438	RELOC_BATCH(const_conf->bo, domain, 0);
439	END_BATCH();
440	break;
441    case SHADER_TYPE_PS:
442	BEGIN_BATCH(3);
443	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
444	END_BATCH();
445	BEGIN_BATCH(3 + 2);
446	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
447	RELOC_BATCH(const_conf->bo, domain, 0);
448	END_BATCH();
449	break;
450    default:
451	ErrorF("Unsupported const type %d\n", const_conf->type);
452	break;
453    }
454
455}
456
457void
458evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
459{
460    RADEONInfoPtr info = RADEONPTR(pScrn);
461    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
462     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
463     */
464    BEGIN_BATCH(3);
465    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
466    END_BATCH();
467}
468
469static void
470evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
471{
472    RADEONInfoPtr info = RADEONPTR(pScrn);
473    struct radeon_accel_state *accel_state = info->accel_state;
474    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
475
476    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
477			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
478			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
479			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
480			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
481    if (res->clamp_x)
482	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
483
484    if (res->format_comp_all)
485	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
486
487    if (res->srf_mode_all)
488	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
489
490    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
491			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
492			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
493			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
494
495    if (res->uncached)
496	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
497
498    /* XXX ??? */
499    sq_vtx_constant_word4 = 0;
500
501    /* flush vertex cache */
502    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
503	(info->ChipFamily == CHIP_FAMILY_PALM) ||
504	(info->ChipFamily == CHIP_FAMILY_CAICOS))
505	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
506				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
507				      res->bo,
508				      domain, 0);
509    else
510	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
511				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
512				      res->bo,
513				      domain, 0);
514
515    BEGIN_BATCH(10 + 2);
516    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
517    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
518    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
519    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
520    E32(sq_vtx_constant_word3);		// 3: swizzles
521    E32(sq_vtx_constant_word4);		// 4: num elements
522    E32(0);							// 5: n/a
523    E32(0);							// 6: n/a
524    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
525    RELOC_BATCH(res->bo, domain, 0);
526    END_BATCH();
527}
528
529void
530evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
531{
532    RADEONInfoPtr info = RADEONPTR(pScrn);
533    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
534    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
535
536    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
537
538    if (tex_res->w)
539	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
540				  ((tex_res->w - 1) << TEX_WIDTH_shift));
541
542    if (tex_res->tile_type)
543	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
544
545    sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
546
547    if (tex_res->h)
548	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
549    if (tex_res->depth)
550	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
551
552    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
553			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
554			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
555			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
556			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
557			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
558			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
559			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
560			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
561			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
562			     (tex_res->base_level << BASE_LEVEL_shift));
563
564    if (tex_res->srf_mode_all)
565	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
566    if (tex_res->force_degamma)
567	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
568
569    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
570			     (tex_res->base_array << BASE_ARRAY_shift) |
571			     (tex_res->last_array << LAST_ARRAY_shift));
572
573    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
574			     (tex_res->perf_modulation << PERF_MODULATION_shift));
575
576    if (tex_res->interlaced)
577	sq_tex_resource_word6 |= INTERLACED_bit;
578
579    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
580			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
581
582    /* flush texture cache */
583    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
584				  tex_res->size, tex_res->base,
585				  tex_res->bo, domain, 0);
586
587    BEGIN_BATCH(10 + 4);
588    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
589    E32(sq_tex_resource_word0);
590    E32(sq_tex_resource_word1);
591    E32(((tex_res->base) >> 8));
592    E32(((tex_res->mip_base) >> 8));
593    E32(sq_tex_resource_word4);
594    E32(sq_tex_resource_word5);
595    E32(sq_tex_resource_word6);
596    E32(sq_tex_resource_word7);
597    RELOC_BATCH(tex_res->bo, domain, 0);
598    RELOC_BATCH(tex_res->mip_bo, domain, 0);
599    END_BATCH();
600}
601
602void
603evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
604{
605    RADEONInfoPtr info = RADEONPTR(pScrn);
606    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
607
608    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
609			    (s->clamp_y       << CLAMP_Y_shift)					|
610			    (s->clamp_z       << CLAMP_Z_shift)					|
611			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
612			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
613			    (s->z_filter      << Z_FILTER_shift)	|
614			    (s->mip_filter    << MIP_FILTER_shift)				|
615			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
616			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
617			    (s->chroma_key    << CHROMA_KEY_shift));
618
619    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
620			    (s->max_lod       << MAX_LOD_shift)					|
621			    (s->perf_mip      << PERF_MIP_shift)	|
622			    (s->perf_z        << PERF_Z_shift));
623
624
625    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
626			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
627
628    if (s->mc_coord_truncate)
629	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
630    if (s->force_degamma)
631	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
632    if (s->truncate_coord)
633	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
634    if (s->disable_cube_wrap)
635	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
636    if (s->type)
637	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
638
639    BEGIN_BATCH(5);
640    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
641    E32(sq_tex_sampler_word0);
642    E32(sq_tex_sampler_word1);
643    E32(sq_tex_sampler_word2);
644    END_BATCH();
645}
646
647//XXX deal with clip offsets in clip setup
648void
649evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
650{
651    RADEONInfoPtr info = RADEONPTR(pScrn);
652
653    BEGIN_BATCH(4);
654    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
655    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
656	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
657    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
658	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
659    END_BATCH();
660}
661
662void
663evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
664{
665    RADEONInfoPtr info = RADEONPTR(pScrn);
666
667    BEGIN_BATCH(4);
668    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
669    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
670	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
671	 WINDOW_OFFSET_DISABLE_bit));
672    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
673	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
674    END_BATCH();
675}
676
677void
678evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
679{
680    RADEONInfoPtr info = RADEONPTR(pScrn);
681
682    BEGIN_BATCH(4);
683    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
684    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
685	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
686	 WINDOW_OFFSET_DISABLE_bit));
687    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
688	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
689    END_BATCH();
690}
691
692void
693evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
694{
695    RADEONInfoPtr info = RADEONPTR(pScrn);
696
697    BEGIN_BATCH(4);
698    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
699    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
700	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
701	 WINDOW_OFFSET_DISABLE_bit));
702    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
703	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
704    END_BATCH();
705}
706
707void
708evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
709{
710    RADEONInfoPtr info = RADEONPTR(pScrn);
711
712    BEGIN_BATCH(4);
713    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
714    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
715	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
716    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
717	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
718    END_BATCH();
719}
720
721/*
722 * Setup of default state
723 */
724
725void
726evergreen_set_default_state(ScrnInfoPtr pScrn)
727{
728    tex_resource_t tex_res;
729    shader_config_t fs_conf;
730    sq_config_t sq_conf;
731    int i;
732    RADEONInfoPtr info = RADEONPTR(pScrn);
733    struct radeon_accel_state *accel_state = info->accel_state;
734
735    if (accel_state->XInited3D)
736	return;
737
738    memset(&tex_res, 0, sizeof(tex_resource_t));
739    memset(&fs_conf, 0, sizeof(shader_config_t));
740
741    accel_state->XInited3D = TRUE;
742
743    evergreen_start_3d(pScrn);
744
745    /* SQ */
746    sq_conf.ps_prio = 0;
747    sq_conf.vs_prio = 1;
748    sq_conf.gs_prio = 2;
749    sq_conf.es_prio = 3;
750    sq_conf.hs_prio = 0;
751    sq_conf.ls_prio = 0;
752    sq_conf.cs_prio = 0;
753
754    switch (info->ChipFamily) {
755    case CHIP_FAMILY_CEDAR:
756    default:
757	sq_conf.num_ps_gprs = 93;
758	sq_conf.num_vs_gprs = 46;
759	sq_conf.num_temp_gprs = 4;
760	sq_conf.num_gs_gprs = 31;
761	sq_conf.num_es_gprs = 31;
762	sq_conf.num_hs_gprs = 23;
763	sq_conf.num_ls_gprs = 23;
764	sq_conf.num_ps_threads = 96;
765	sq_conf.num_vs_threads = 16;
766	sq_conf.num_gs_threads = 16;
767	sq_conf.num_es_threads = 16;
768	sq_conf.num_hs_threads = 16;
769	sq_conf.num_ls_threads = 16;
770	sq_conf.num_ps_stack_entries = 42;
771	sq_conf.num_vs_stack_entries = 42;
772	sq_conf.num_gs_stack_entries = 42;
773	sq_conf.num_es_stack_entries = 42;
774	sq_conf.num_hs_stack_entries = 42;
775	sq_conf.num_ls_stack_entries = 42;
776	break;
777    case CHIP_FAMILY_REDWOOD:
778	sq_conf.num_ps_gprs = 93;
779	sq_conf.num_vs_gprs = 46;
780	sq_conf.num_temp_gprs = 4;
781	sq_conf.num_gs_gprs = 31;
782	sq_conf.num_es_gprs = 31;
783	sq_conf.num_hs_gprs = 23;
784	sq_conf.num_ls_gprs = 23;
785	sq_conf.num_ps_threads = 128;
786	sq_conf.num_vs_threads = 20;
787	sq_conf.num_gs_threads = 20;
788	sq_conf.num_es_threads = 20;
789	sq_conf.num_hs_threads = 20;
790	sq_conf.num_ls_threads = 20;
791	sq_conf.num_ps_stack_entries = 42;
792	sq_conf.num_vs_stack_entries = 42;
793	sq_conf.num_gs_stack_entries = 42;
794	sq_conf.num_es_stack_entries = 42;
795	sq_conf.num_hs_stack_entries = 42;
796	sq_conf.num_ls_stack_entries = 42;
797	break;
798    case CHIP_FAMILY_JUNIPER:
799	sq_conf.num_ps_gprs = 93;
800	sq_conf.num_vs_gprs = 46;
801	sq_conf.num_temp_gprs = 4;
802	sq_conf.num_gs_gprs = 31;
803	sq_conf.num_es_gprs = 31;
804	sq_conf.num_hs_gprs = 23;
805	sq_conf.num_ls_gprs = 23;
806	sq_conf.num_ps_threads = 128;
807	sq_conf.num_vs_threads = 20;
808	sq_conf.num_gs_threads = 20;
809	sq_conf.num_es_threads = 20;
810	sq_conf.num_hs_threads = 20;
811	sq_conf.num_ls_threads = 20;
812	sq_conf.num_ps_stack_entries = 85;
813	sq_conf.num_vs_stack_entries = 85;
814	sq_conf.num_gs_stack_entries = 85;
815	sq_conf.num_es_stack_entries = 85;
816	sq_conf.num_hs_stack_entries = 85;
817	sq_conf.num_ls_stack_entries = 85;
818	break;
819    case CHIP_FAMILY_CYPRESS:
820    case CHIP_FAMILY_HEMLOCK:
821	sq_conf.num_ps_gprs = 93;
822	sq_conf.num_vs_gprs = 46;
823	sq_conf.num_temp_gprs = 4;
824	sq_conf.num_gs_gprs = 31;
825	sq_conf.num_es_gprs = 31;
826	sq_conf.num_hs_gprs = 23;
827	sq_conf.num_ls_gprs = 23;
828	sq_conf.num_ps_threads = 128;
829	sq_conf.num_vs_threads = 20;
830	sq_conf.num_gs_threads = 20;
831	sq_conf.num_es_threads = 20;
832	sq_conf.num_hs_threads = 20;
833	sq_conf.num_ls_threads = 20;
834	sq_conf.num_ps_stack_entries = 85;
835	sq_conf.num_vs_stack_entries = 85;
836	sq_conf.num_gs_stack_entries = 85;
837	sq_conf.num_es_stack_entries = 85;
838	sq_conf.num_hs_stack_entries = 85;
839	sq_conf.num_ls_stack_entries = 85;
840	break;
841    case CHIP_FAMILY_PALM:
842	sq_conf.num_ps_gprs = 93;
843	sq_conf.num_vs_gprs = 46;
844	sq_conf.num_temp_gprs = 4;
845	sq_conf.num_gs_gprs = 31;
846	sq_conf.num_es_gprs = 31;
847	sq_conf.num_hs_gprs = 23;
848	sq_conf.num_ls_gprs = 23;
849	sq_conf.num_ps_threads = 96;
850	sq_conf.num_vs_threads = 16;
851	sq_conf.num_gs_threads = 16;
852	sq_conf.num_es_threads = 16;
853	sq_conf.num_hs_threads = 16;
854	sq_conf.num_ls_threads = 16;
855	sq_conf.num_ps_stack_entries = 42;
856	sq_conf.num_vs_stack_entries = 42;
857	sq_conf.num_gs_stack_entries = 42;
858	sq_conf.num_es_stack_entries = 42;
859	sq_conf.num_hs_stack_entries = 42;
860	sq_conf.num_ls_stack_entries = 42;
861	break;
862    case CHIP_FAMILY_BARTS:
863	sq_conf.num_ps_gprs = 93;
864	sq_conf.num_vs_gprs = 46;
865	sq_conf.num_temp_gprs = 4;
866	sq_conf.num_gs_gprs = 31;
867	sq_conf.num_es_gprs = 31;
868	sq_conf.num_hs_gprs = 23;
869	sq_conf.num_ls_gprs = 23;
870	sq_conf.num_ps_threads = 128;
871	sq_conf.num_vs_threads = 20;
872	sq_conf.num_gs_threads = 20;
873	sq_conf.num_es_threads = 20;
874	sq_conf.num_hs_threads = 20;
875	sq_conf.num_ls_threads = 20;
876	sq_conf.num_ps_stack_entries = 85;
877	sq_conf.num_vs_stack_entries = 85;
878	sq_conf.num_gs_stack_entries = 85;
879	sq_conf.num_es_stack_entries = 85;
880	sq_conf.num_hs_stack_entries = 85;
881	sq_conf.num_ls_stack_entries = 85;
882	break;
883    case CHIP_FAMILY_TURKS:
884	sq_conf.num_ps_gprs = 93;
885	sq_conf.num_vs_gprs = 46;
886	sq_conf.num_temp_gprs = 4;
887	sq_conf.num_gs_gprs = 31;
888	sq_conf.num_es_gprs = 31;
889	sq_conf.num_hs_gprs = 23;
890	sq_conf.num_ls_gprs = 23;
891	sq_conf.num_ps_threads = 128;
892	sq_conf.num_vs_threads = 20;
893	sq_conf.num_gs_threads = 20;
894	sq_conf.num_es_threads = 20;
895	sq_conf.num_hs_threads = 20;
896	sq_conf.num_ls_threads = 20;
897	sq_conf.num_ps_stack_entries = 42;
898	sq_conf.num_vs_stack_entries = 42;
899	sq_conf.num_gs_stack_entries = 42;
900	sq_conf.num_es_stack_entries = 42;
901	sq_conf.num_hs_stack_entries = 42;
902	sq_conf.num_ls_stack_entries = 42;
903	break;
904    case CHIP_FAMILY_CAICOS:
905	sq_conf.num_ps_gprs = 93;
906	sq_conf.num_vs_gprs = 46;
907	sq_conf.num_temp_gprs = 4;
908	sq_conf.num_gs_gprs = 31;
909	sq_conf.num_es_gprs = 31;
910	sq_conf.num_hs_gprs = 23;
911	sq_conf.num_ls_gprs = 23;
912	sq_conf.num_ps_threads = 128;
913	sq_conf.num_vs_threads = 10;
914	sq_conf.num_gs_threads = 10;
915	sq_conf.num_es_threads = 10;
916	sq_conf.num_hs_threads = 10;
917	sq_conf.num_ls_threads = 10;
918	sq_conf.num_ps_stack_entries = 42;
919	sq_conf.num_vs_stack_entries = 42;
920	sq_conf.num_gs_stack_entries = 42;
921	sq_conf.num_es_stack_entries = 42;
922	sq_conf.num_hs_stack_entries = 42;
923	sq_conf.num_ls_stack_entries = 42;
924	break;
925    }
926
927    evergreen_sq_setup(pScrn, &sq_conf);
928
929    BEGIN_BATCH(24);
930    EREG(SQ_LDS_ALLOC_PS, 0);
931    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
932
933    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
934    E32(0);
935    E32(0);
936    E32(0);
937    E32(0);
938    E32(0);
939    E32(0);
940
941    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
942    E32(0);
943    E32(0);
944    E32(0);
945    E32(0);
946
947    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
948    E32(0);
949    E32(0);
950    END_BATCH();
951
952    /* DB */
953    BEGIN_BATCH(3 + 2);
954    EREG(DB_Z_INFO,                           0);
955    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
956    END_BATCH();
957
958    BEGIN_BATCH(3 + 2);
959    EREG(DB_STENCIL_INFO,                     0);
960    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
961    END_BATCH();
962
963    BEGIN_BATCH(3 + 2);
964    EREG(DB_HTILE_DATA_BASE,                    0);
965    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
966    END_BATCH();
967
968    BEGIN_BATCH(49);
969    EREG(DB_DEPTH_CONTROL,                    0);
970
971    PACK0(PA_SC_VPORT_ZMIN_0, 2);
972    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
973    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
974
975    PACK0(DB_RENDER_CONTROL, 5);
976    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
977    E32(0); // DB_COUNT_CONTROL
978    E32(0); // DB_DEPTH_VIEW
979    E32(0x2a); // DB_RENDER_OVERRIDE
980    E32(0); // DB_RENDER_OVERRIDE2
981
982    PACK0(DB_STENCIL_CLEAR, 2);
983    E32(0); // DB_STENCIL_CLEAR
984    E32(0); // DB_DEPTH_CLEAR
985
986    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
987					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
988					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
989					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
990
991    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
992			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
993
994    // SX
995    EREG(SX_MISC,               0);
996
997    // CB
998    PACK0(SX_ALPHA_TEST_CONTROL, 5);
999    E32(0); // SX_ALPHA_TEST_CONTROL
1000    E32(0x00000000); //CB_BLEND_RED
1001    E32(0x00000000); //CB_BLEND_GREEN
1002    E32(0x00000000); //CB_BLEND_BLUE
1003    E32(0x00000000); //CB_BLEND_ALPHA
1004
1005    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1006
1007    // SC
1008    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1009					       (0 << WINDOW_Y_OFFSET_shift)));
1010    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1011    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1012    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1013    END_BATCH();
1014
1015    /* clip boolean is set to always visible -> doesn't matter */
1016    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1017	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1018
1019    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1020	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1021
1022    BEGIN_BATCH(50);
1023    PACK0(PA_SC_MODE_CNTL_0, 2);
1024    E32(0); // PA_SC_MODE_CNTL_0
1025    E32(0); // PA_SC_MODE_CNTL_1
1026
1027    PACK0(PA_SC_LINE_CNTL, 16);
1028    E32(0); // PA_SC_LINE_CNTL
1029    E32(0); // PA_SC_AA_CONFIG
1030    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1031	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1032    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1033    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1034    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1035    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1036    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1037    E32(0);
1038    E32(0);
1039    E32(0);
1040    E32(0);
1041    E32(0);
1042    E32(0);
1043    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1044    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1045
1046    // CL
1047    PACK0(PA_CL_CLIP_CNTL, 8);
1048    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1049    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1050    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1051    E32(0); // PA_CL_VS_OUT_CNTL
1052    E32(0); // PA_CL_NANINF_CNTL
1053    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1054    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1055    E32(0); // PA_SU_PRIM_FILTER_CNTL
1056
1057    // SU
1058    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1059    E32(0);
1060    E32(0);
1061    E32(0);
1062    E32(0);
1063    E32(0);
1064    E32(0);
1065
1066    PACK0(SPI_INPUT_Z, 8);
1067    E32(0); // SPI_INPUT_Z
1068    E32(0); // SPI_FOG_CNTL
1069    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1070    E32(0); // SPI_PS_IN_CONTROL_2
1071    E32(0);
1072    E32(0);
1073    E32(0);
1074    E32(0);
1075    END_BATCH();
1076
1077    // clear FS
1078    fs_conf.bo = accel_state->shaders_bo;
1079    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1080
1081    // VGT
1082    BEGIN_BATCH(46);
1083
1084    PACK0(VGT_MAX_VTX_INDX, 4);
1085    E32(0xffffff);
1086    E32(0);
1087    E32(0);
1088    E32(0);
1089
1090    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1091    E32(0);
1092    E32(0);
1093
1094    PACK0(VGT_REUSE_OFF, 2);
1095    E32(0);
1096    E32(0);
1097
1098    PACK0(PA_SU_POINT_SIZE, 17);
1099    E32(0); // PA_SU_POINT_SIZE
1100    E32(0); // PA_SU_POINT_MINMAX
1101    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1102    E32(0); // PA_SC_LINE_STIPPLE
1103    E32(0); // VGT_OUTPUT_PATH_CNTL
1104    E32(0); // VGT_HOS_CNTL
1105    E32(0);
1106    E32(0);
1107    E32(0);
1108    E32(0);
1109    E32(0);
1110    E32(0);
1111    E32(0);
1112    E32(0);
1113    E32(0);
1114    E32(0);
1115    E32(0); // VGT_GS_MODE
1116
1117    EREG(VGT_PRIMITIVEID_EN,                  0);
1118    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1119    EREG(VGT_SHADER_STAGES_EN,          0);
1120
1121    PACK0(VGT_STRMOUT_CONFIG, 2);
1122    E32(0);
1123    E32(0);
1124    END_BATCH();
1125}
1126
1127
1128/*
1129 * Commands
1130 */
1131
1132void
1133evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1134{
1135    RADEONInfoPtr info = RADEONPTR(pScrn);
1136
1137    BEGIN_BATCH(10);
1138    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1139    PACK3(IT_INDEX_TYPE, 1);
1140    E32(draw_conf->index_type);
1141    PACK3(IT_NUM_INSTANCES, 1);
1142    E32(draw_conf->num_instances);
1143    PACK3(IT_DRAW_INDEX_AUTO, 2);
1144    E32(draw_conf->num_indices);
1145    E32(draw_conf->vgt_draw_initiator);
1146    END_BATCH();
1147}
1148
1149void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1150{
1151    RADEONInfoPtr info = RADEONPTR(pScrn);
1152    struct radeon_accel_state *accel_state = info->accel_state;
1153    draw_config_t   draw_conf;
1154    vtx_resource_t  vtx_res;
1155
1156    if (accel_state->vbo.vb_start_op == -1)
1157      return;
1158
1159    CLEAR (draw_conf);
1160    CLEAR (vtx_res);
1161
1162    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1163	radeon_ib_discard(pScrn);
1164	radeon_cs_flush_indirect(pScrn);
1165	return;
1166    }
1167
1168    /* Vertex buffer setup */
1169    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1170    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1171    vtx_res.vtx_size_dw     = vtx_size / 4;
1172    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1173    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
1174    vtx_res.bo              = accel_state->vbo.vb_bo;
1175    vtx_res.dst_sel_x       = SQ_SEL_X;
1176    vtx_res.dst_sel_y       = SQ_SEL_Y;
1177    vtx_res.dst_sel_z       = SQ_SEL_Z;
1178    vtx_res.dst_sel_w       = SQ_SEL_W;
1179    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1180
1181    /* Draw */
1182    draw_conf.prim_type          = DI_PT_RECTLIST;
1183    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1184    draw_conf.num_instances      = 1;
1185    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1186    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1187
1188    evergreen_draw_auto(pScrn, &draw_conf);
1189
1190    /* sync dst surface */
1191    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1192				  accel_state->dst_size, accel_state->dst_obj.offset,
1193				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1194
1195    accel_state->vbo.vb_start_op = -1;
1196    accel_state->cbuf.vb_start_op = -1;
1197    accel_state->ib_reset_op = 0;
1198
1199}
1200
1201#endif
1202