evergreen_accel.c revision b13dfe66
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#ifdef XF86DRM_MODE
31
32#include "xf86.h"
33
34#include <errno.h>
35
36#include "radeon.h"
37#include "evergreen_shader.h"
38#include "radeon_reg.h"
39#include "evergreen_reg.h"
40#include "evergreen_state.h"
41
42#include "radeon_drm.h"
43#include "radeon_vbo.h"
44#include "radeon_exa_shared.h"
45
46static const uint32_t EVERGREEN_ROP[16] = {
47    RADEON_ROP3_ZERO, /* GXclear        */
48    RADEON_ROP3_DSa,  /* Gxand          */
49    RADEON_ROP3_SDna, /* GXandReverse   */
50    RADEON_ROP3_S,    /* GXcopy         */
51    RADEON_ROP3_DSna, /* GXandInverted  */
52    RADEON_ROP3_D,    /* GXnoop         */
53    RADEON_ROP3_DSx,  /* GXxor          */
54    RADEON_ROP3_DSo,  /* GXor           */
55    RADEON_ROP3_DSon, /* GXnor          */
56    RADEON_ROP3_DSxn, /* GXequiv        */
57    RADEON_ROP3_Dn,   /* GXinvert       */
58    RADEON_ROP3_SDno, /* GXorReverse    */
59    RADEON_ROP3_Sn,   /* GXcopyInverted */
60    RADEON_ROP3_DSno, /* GXorInverted   */
61    RADEON_ROP3_DSan, /* GXnand         */
62    RADEON_ROP3_ONE,  /* GXset          */
63};
64
65void
66evergreen_start_3d(ScrnInfoPtr pScrn)
67{
68    RADEONInfoPtr info = RADEONPTR(pScrn);
69
70    BEGIN_BATCH(3);
71    PACK3(IT_CONTEXT_CONTROL, 2);
72    E32(0x80000000);
73    E32(0x80000000);
74    END_BATCH();
75
76}
77
78/*
79 * Setup of functional groups
80 */
81
82// asic stack/thread/gpr limits - need to query the drm
83static void
84evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
85{
86    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
87    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
88    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
89    RADEONInfoPtr info = RADEONPTR(pScrn);
90
91    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
92	(info->ChipFamily == CHIP_FAMILY_PALM) ||
93	(info->ChipFamily == CHIP_FAMILY_CAICOS))
94	sq_config = 0;
95    else
96	sq_config = VC_ENABLE_bit;
97
98    sq_config |= (EXPORT_SRC_C_bit |
99		  (sq_conf->cs_prio << CS_PRIO_shift) |
100		  (sq_conf->ls_prio << LS_PRIO_shift) |
101		  (sq_conf->hs_prio << HS_PRIO_shift) |
102		  (sq_conf->ps_prio << PS_PRIO_shift) |
103		  (sq_conf->vs_prio << VS_PRIO_shift) |
104		  (sq_conf->gs_prio << GS_PRIO_shift) |
105		  (sq_conf->es_prio << ES_PRIO_shift));
106
107    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
108			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
109			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
110    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
111			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
112    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
113			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
114
115    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
116			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
117			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
118			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
119    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
120				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
121
122    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
123				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
124
125    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
126				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
127
128    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
129				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
130
131    BEGIN_BATCH(16);
132    /* disable dyn gprs */
133    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
134    PACK0(SQ_CONFIG, 4);
135    E32(sq_config);
136    E32(sq_gpr_resource_mgmt_1);
137    E32(sq_gpr_resource_mgmt_2);
138    E32(sq_gpr_resource_mgmt_3);
139    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
140    E32(sq_thread_resource_mgmt);
141    E32(sq_thread_resource_mgmt_2);
142    E32(sq_stack_resource_mgmt_1);
143    E32(sq_stack_resource_mgmt_2);
144    E32(sq_stack_resource_mgmt_3);
145    END_BATCH();
146}
147
148void
149evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
150{
151    uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
152    int pitch, slice, h;
153    RADEONInfoPtr info = RADEONPTR(pScrn);
154
155    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
156		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
157		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
158		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
159		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
160		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
161		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
162    if (cb_conf->blend_clamp)
163	cb_color_info |= BLEND_CLAMP_bit;
164    if (cb_conf->fast_clear)
165	cb_color_info |= FAST_CLEAR_bit;
166    if (cb_conf->compression)
167	cb_color_info |= COMPRESSION_bit;
168    if (cb_conf->blend_bypass)
169	cb_color_info |= BLEND_BYPASS_bit;
170    if (cb_conf->simple_float)
171	cb_color_info |= SIMPLE_FLOAT_bit;
172    if (cb_conf->round_mode)
173	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
174    if (cb_conf->tile_compact)
175	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
176    if (cb_conf->rat)
177	cb_color_info |= RAT_bit;
178
179    /* bit 4 needs to be set for linear and depth/stencil surfaces */
180    if (cb_conf->non_disp_tiling)
181	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
182
183    pitch = (cb_conf->w / 8) - 1;
184    h = RADEON_ALIGN(cb_conf->h, 8);
185    slice = ((cb_conf->w * h) / 64) - 1;
186
187    switch (cb_conf->resource_type) {
188    case BUFFER:
189	/* number of elements in the surface */
190	cb_color_dim = pitch * slice;
191	break;
192    default:
193	/* w/h of the surface */
194	cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
195			((cb_conf->h - 1) << HEIGHT_MAX_shift));
196	break;
197    }
198
199    BEGIN_BATCH(3 + 2);
200    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
201    RELOC_BATCH(cb_conf->bo, 0, domain);
202    END_BATCH();
203
204    /* Set CMASK & FMASK buffer to the offset of color buffer as
205     * we don't use those this shouldn't cause any issue and we
206     * then have a valid cmd stream
207     */
208    BEGIN_BATCH(3 + 2);
209    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
210    RELOC_BATCH(cb_conf->bo, 0, domain);
211    END_BATCH();
212    BEGIN_BATCH(3 + 2);
213    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
214    RELOC_BATCH(cb_conf->bo, 0, domain);
215    END_BATCH();
216
217    /* tiling config */
218    BEGIN_BATCH(3 + 2);
219    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
220    RELOC_BATCH(cb_conf->bo, 0, domain);
221    END_BATCH();
222    BEGIN_BATCH(3 + 2);
223    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
224    RELOC_BATCH(cb_conf->bo, 0, domain);
225    END_BATCH();
226
227    BEGIN_BATCH(33);
228    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
229    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
230    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
231    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
232    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
233    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
234    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
235    E32(0);
236    E32(0);
237    E32(0);
238    E32(0);
239    EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
240    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
241					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
242    EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
243    END_BATCH();
244
245}
246
247static void
248evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
249			      uint32_t size, uint64_t mc_addr,
250			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
251{
252    RADEONInfoPtr info = RADEONPTR(pScrn);
253    uint32_t cp_coher_size;
254    if (size == 0xffffffff)
255	cp_coher_size = 0xffffffff;
256    else
257	cp_coher_size = ((size + 255) >> 8);
258
259    BEGIN_BATCH(5 + 2);
260    PACK3(IT_SURFACE_SYNC, 4);
261    E32(sync_type);
262    E32(cp_coher_size);
263    E32((mc_addr >> 8));
264    E32(10); /* poll interval */
265    RELOC_BATCH(bo, rdomains, wdomain);
266    END_BATCH();
267}
268
269/* inserts a wait for vline in the command stream */
270void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
271				  xf86CrtcPtr crtc, int start, int stop)
272{
273    RADEONInfoPtr  info = RADEONPTR(pScrn);
274    drmmode_crtc_private_ptr drmmode_crtc;
275    uint32_t offset;
276
277    if (!crtc)
278        return;
279
280    drmmode_crtc = crtc->driver_private;
281
282    if (stop < start)
283        return;
284
285    if (!crtc->enabled)
286        return;
287
288    if (info->cs) {
289        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
290	    return;
291    } else {
292#ifdef USE_EXA
293	if (info->useEXA)
294	    offset = exaGetPixmapOffset(pPix);
295	else
296#endif
297	    offset = pPix->devPrivate.ptr - info->FB;
298
299	/* if drawing to front buffer */
300	if (offset != 0)
301	    return;
302    }
303
304    start = max(start, 0);
305    stop = min(stop, crtc->mode.VDisplay);
306
307    if (start > crtc->mode.VDisplay)
308        return;
309
310    BEGIN_BATCH(11);
311    /* set the VLINE range */
312    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
313	 (start << EVERGREEN_VLINE_START_SHIFT) |
314	 (stop << EVERGREEN_VLINE_END_SHIFT));
315
316    /* tell the CP to poll the VLINE state register */
317    PACK3(IT_WAIT_REG_MEM, 6);
318    E32(IT_WAIT_REG | IT_WAIT_EQ);
319    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
320    E32(0);
321    E32(0);                          // Ref value
322    E32(EVERGREEN_VLINE_STAT);    // Mask
323    E32(10);                         // Wait interval
324    /* add crtc reloc */
325    PACK3(IT_NOP, 1);
326    E32(drmmode_crtc->mode_crtc->crtc_id);
327    END_BATCH();
328}
329
330void
331evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
332{
333    RADEONInfoPtr info = RADEONPTR(pScrn);
334
335    BEGIN_BATCH(8);
336    /* Interpolator setup */
337    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
338    PACK0(SPI_PS_IN_CONTROL_0, 3);
339    E32(((num_interp << NUM_INTERP_shift) |
340	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
341    E32(0); // SPI_PS_IN_CONTROL_1
342    E32(0); // SPI_INTERP_CONTROL_0
343    END_BATCH();
344}
345
346void
347evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
348{
349    RADEONInfoPtr info = RADEONPTR(pScrn);
350    uint32_t sq_pgm_resources;
351
352    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
353			(fs_conf->stack_size << STACK_SIZE_shift));
354
355    if (fs_conf->dx10_clamp)
356	sq_pgm_resources |= DX10_CLAMP_bit;
357
358    BEGIN_BATCH(3 + 2);
359    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
360    RELOC_BATCH(fs_conf->bo, domain, 0);
361    END_BATCH();
362
363    BEGIN_BATCH(3);
364    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
365    END_BATCH();
366}
367
368void
369evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
370{
371    RADEONInfoPtr info = RADEONPTR(pScrn);
372    uint32_t sq_pgm_resources, sq_pgm_resources_2;
373
374    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
375			(vs_conf->stack_size << STACK_SIZE_shift));
376
377    if (vs_conf->dx10_clamp)
378	sq_pgm_resources |= DX10_CLAMP_bit;
379    if (vs_conf->uncached_first_inst)
380	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
381
382    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
383			  (vs_conf->double_round << DOUBLE_ROUND_shift));
384
385    if (vs_conf->allow_sdi)
386	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
387    if (vs_conf->allow_sd0)
388	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
389    if (vs_conf->allow_ddi)
390	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
391    if (vs_conf->allow_ddo)
392	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
393
394    /* flush SQ cache */
395    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
396				  vs_conf->shader_size, vs_conf->shader_addr,
397				  vs_conf->bo, domain, 0);
398
399    BEGIN_BATCH(3 + 2);
400    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
401    RELOC_BATCH(vs_conf->bo, domain, 0);
402    END_BATCH();
403
404    BEGIN_BATCH(4);
405    PACK0(SQ_PGM_RESOURCES_VS, 2);
406    E32(sq_pgm_resources);
407    E32(sq_pgm_resources_2);
408    END_BATCH();
409}
410
411void
412evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
413{
414    RADEONInfoPtr info = RADEONPTR(pScrn);
415    uint32_t sq_pgm_resources, sq_pgm_resources_2;
416
417    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
418			(ps_conf->stack_size << STACK_SIZE_shift));
419
420    if (ps_conf->dx10_clamp)
421	sq_pgm_resources |= DX10_CLAMP_bit;
422    if (ps_conf->uncached_first_inst)
423	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
424    if (ps_conf->clamp_consts)
425	sq_pgm_resources |= CLAMP_CONSTS_bit;
426
427    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
428			  (ps_conf->double_round << DOUBLE_ROUND_shift));
429
430    if (ps_conf->allow_sdi)
431	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
432    if (ps_conf->allow_sd0)
433	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
434    if (ps_conf->allow_ddi)
435	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
436    if (ps_conf->allow_ddo)
437	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
438
439    /* flush SQ cache */
440    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
441				  ps_conf->shader_size, ps_conf->shader_addr,
442				  ps_conf->bo, domain, 0);
443
444    BEGIN_BATCH(3 + 2);
445    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
446    RELOC_BATCH(ps_conf->bo, domain, 0);
447    END_BATCH();
448
449    BEGIN_BATCH(5);
450    PACK0(SQ_PGM_RESOURCES_PS, 3);
451    E32(sq_pgm_resources);
452    E32(sq_pgm_resources_2);
453    E32(ps_conf->export_mode);
454    END_BATCH();
455}
456
457void
458evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
459{
460    RADEONInfoPtr info = RADEONPTR(pScrn);
461    /* size reg is units of 16 consts (4 dwords each) */
462    uint32_t size = const_conf->size_bytes >> 8;
463
464    if (size == 0)
465	size = 1;
466
467    /* flush SQ cache */
468    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
469				  const_conf->size_bytes, const_conf->const_addr,
470				  const_conf->bo, domain, 0);
471
472    switch (const_conf->type) {
473    case SHADER_TYPE_VS:
474	BEGIN_BATCH(3);
475	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
476	END_BATCH();
477	BEGIN_BATCH(3 + 2);
478	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
479	RELOC_BATCH(const_conf->bo, domain, 0);
480	END_BATCH();
481	break;
482    case SHADER_TYPE_PS:
483	BEGIN_BATCH(3);
484	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
485	END_BATCH();
486	BEGIN_BATCH(3 + 2);
487	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
488	RELOC_BATCH(const_conf->bo, domain, 0);
489	END_BATCH();
490	break;
491    default:
492	ErrorF("Unsupported const type %d\n", const_conf->type);
493	break;
494    }
495
496}
497
498void
499evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
500{
501    RADEONInfoPtr info = RADEONPTR(pScrn);
502    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
503     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
504     */
505    BEGIN_BATCH(3);
506    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
507    END_BATCH();
508}
509
510static void
511evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
512{
513    RADEONInfoPtr info = RADEONPTR(pScrn);
514    struct radeon_accel_state *accel_state = info->accel_state;
515    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
516
517    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
518			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
519			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
520			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
521			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
522    if (res->clamp_x)
523	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
524
525    if (res->format_comp_all)
526	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
527
528    if (res->srf_mode_all)
529	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
530
531    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
532			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
533			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
534			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
535
536    if (res->uncached)
537	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
538
539    /* XXX ??? */
540    sq_vtx_constant_word4 = 0;
541
542    /* flush vertex cache */
543    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
544	(info->ChipFamily == CHIP_FAMILY_PALM) ||
545	(info->ChipFamily == CHIP_FAMILY_CAICOS))
546	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
547				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
548				      res->bo,
549				      domain, 0);
550    else
551	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
552				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
553				      res->bo,
554				      domain, 0);
555
556    BEGIN_BATCH(10 + 2);
557    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
558    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
559    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
560    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
561    E32(sq_vtx_constant_word3);		// 3: swizzles
562    E32(sq_vtx_constant_word4);		// 4: num elements
563    E32(0);							// 5: n/a
564    E32(0);							// 6: n/a
565    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
566    RELOC_BATCH(res->bo, domain, 0);
567    END_BATCH();
568}
569
570void
571evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
572{
573    RADEONInfoPtr info = RADEONPTR(pScrn);
574    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
575    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
576
577    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
578
579    if (tex_res->w)
580	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
581				  ((tex_res->w - 1) << TEX_WIDTH_shift));
582
583    if (tex_res->tile_type)
584	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
585
586    sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
587
588    if (tex_res->h)
589	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
590    if (tex_res->depth)
591	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
592
593    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
594			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
595			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
596			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
597			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
598			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
599			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
600			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
601			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
602			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
603			     (tex_res->base_level << BASE_LEVEL_shift));
604
605    if (tex_res->srf_mode_all)
606	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
607    if (tex_res->force_degamma)
608	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
609
610    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
611			     (tex_res->base_array << BASE_ARRAY_shift) |
612			     (tex_res->last_array << LAST_ARRAY_shift));
613
614    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
615			     (tex_res->perf_modulation << PERF_MODULATION_shift));
616
617    if (tex_res->interlaced)
618	sq_tex_resource_word6 |= INTERLACED_bit;
619
620    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
621			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
622
623    /* flush texture cache */
624    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
625				  tex_res->size, tex_res->base,
626				  tex_res->bo, domain, 0);
627
628    BEGIN_BATCH(10 + 4);
629    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
630    E32(sq_tex_resource_word0);
631    E32(sq_tex_resource_word1);
632    E32(((tex_res->base) >> 8));
633    E32(((tex_res->mip_base) >> 8));
634    E32(sq_tex_resource_word4);
635    E32(sq_tex_resource_word5);
636    E32(sq_tex_resource_word6);
637    E32(sq_tex_resource_word7);
638    RELOC_BATCH(tex_res->bo, domain, 0);
639    RELOC_BATCH(tex_res->mip_bo, domain, 0);
640    END_BATCH();
641}
642
643void
644evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
645{
646    RADEONInfoPtr info = RADEONPTR(pScrn);
647    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
648
649    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
650			    (s->clamp_y       << CLAMP_Y_shift)					|
651			    (s->clamp_z       << CLAMP_Z_shift)					|
652			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
653			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
654			    (s->z_filter      << Z_FILTER_shift)	|
655			    (s->mip_filter    << MIP_FILTER_shift)				|
656			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
657			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
658			    (s->chroma_key    << CHROMA_KEY_shift));
659
660    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
661			    (s->max_lod       << MAX_LOD_shift)					|
662			    (s->perf_mip      << PERF_MIP_shift)	|
663			    (s->perf_z        << PERF_Z_shift));
664
665
666    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
667			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
668
669    if (s->mc_coord_truncate)
670	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
671    if (s->force_degamma)
672	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
673    if (s->truncate_coord)
674	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
675    if (s->disable_cube_wrap)
676	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
677    if (s->type)
678	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
679
680    BEGIN_BATCH(5);
681    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
682    E32(sq_tex_sampler_word0);
683    E32(sq_tex_sampler_word1);
684    E32(sq_tex_sampler_word2);
685    END_BATCH();
686}
687
688//XXX deal with clip offsets in clip setup
689void
690evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
691{
692    RADEONInfoPtr info = RADEONPTR(pScrn);
693
694    BEGIN_BATCH(4);
695    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
696    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
697	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
698    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
699	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
700    END_BATCH();
701}
702
703void
704evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
705{
706    RADEONInfoPtr info = RADEONPTR(pScrn);
707
708    BEGIN_BATCH(4);
709    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
710    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
711	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
712	 WINDOW_OFFSET_DISABLE_bit));
713    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
714	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
715    END_BATCH();
716}
717
718void
719evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
720{
721    RADEONInfoPtr info = RADEONPTR(pScrn);
722
723    BEGIN_BATCH(4);
724    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
725    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
726	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
727	 WINDOW_OFFSET_DISABLE_bit));
728    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
729	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
730    END_BATCH();
731}
732
733void
734evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
735{
736    RADEONInfoPtr info = RADEONPTR(pScrn);
737
738    BEGIN_BATCH(4);
739    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
740    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
741	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
742	 WINDOW_OFFSET_DISABLE_bit));
743    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
744	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
745    END_BATCH();
746}
747
748void
749evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
750{
751    RADEONInfoPtr info = RADEONPTR(pScrn);
752
753    BEGIN_BATCH(4);
754    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
755    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
756	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
757    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
758	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
759    END_BATCH();
760}
761
762/*
763 * Setup of default state
764 */
765
766void
767evergreen_set_default_state(ScrnInfoPtr pScrn)
768{
769    tex_resource_t tex_res;
770    shader_config_t fs_conf;
771    sq_config_t sq_conf;
772    int i;
773    RADEONInfoPtr info = RADEONPTR(pScrn);
774    struct radeon_accel_state *accel_state = info->accel_state;
775
776    if (accel_state->XInited3D)
777	return;
778
779    memset(&tex_res, 0, sizeof(tex_resource_t));
780    memset(&fs_conf, 0, sizeof(shader_config_t));
781
782    accel_state->XInited3D = TRUE;
783
784    evergreen_start_3d(pScrn);
785
786    /* SQ */
787    sq_conf.ps_prio = 0;
788    sq_conf.vs_prio = 1;
789    sq_conf.gs_prio = 2;
790    sq_conf.es_prio = 3;
791    sq_conf.hs_prio = 0;
792    sq_conf.ls_prio = 0;
793    sq_conf.cs_prio = 0;
794
795    switch (info->ChipFamily) {
796    case CHIP_FAMILY_CEDAR:
797    default:
798	sq_conf.num_ps_gprs = 93;
799	sq_conf.num_vs_gprs = 46;
800	sq_conf.num_temp_gprs = 4;
801	sq_conf.num_gs_gprs = 31;
802	sq_conf.num_es_gprs = 31;
803	sq_conf.num_hs_gprs = 23;
804	sq_conf.num_ls_gprs = 23;
805	sq_conf.num_ps_threads = 96;
806	sq_conf.num_vs_threads = 16;
807	sq_conf.num_gs_threads = 16;
808	sq_conf.num_es_threads = 16;
809	sq_conf.num_hs_threads = 16;
810	sq_conf.num_ls_threads = 16;
811	sq_conf.num_ps_stack_entries = 42;
812	sq_conf.num_vs_stack_entries = 42;
813	sq_conf.num_gs_stack_entries = 42;
814	sq_conf.num_es_stack_entries = 42;
815	sq_conf.num_hs_stack_entries = 42;
816	sq_conf.num_ls_stack_entries = 42;
817	break;
818    case CHIP_FAMILY_REDWOOD:
819	sq_conf.num_ps_gprs = 93;
820	sq_conf.num_vs_gprs = 46;
821	sq_conf.num_temp_gprs = 4;
822	sq_conf.num_gs_gprs = 31;
823	sq_conf.num_es_gprs = 31;
824	sq_conf.num_hs_gprs = 23;
825	sq_conf.num_ls_gprs = 23;
826	sq_conf.num_ps_threads = 128;
827	sq_conf.num_vs_threads = 20;
828	sq_conf.num_gs_threads = 20;
829	sq_conf.num_es_threads = 20;
830	sq_conf.num_hs_threads = 20;
831	sq_conf.num_ls_threads = 20;
832	sq_conf.num_ps_stack_entries = 42;
833	sq_conf.num_vs_stack_entries = 42;
834	sq_conf.num_gs_stack_entries = 42;
835	sq_conf.num_es_stack_entries = 42;
836	sq_conf.num_hs_stack_entries = 42;
837	sq_conf.num_ls_stack_entries = 42;
838	break;
839    case CHIP_FAMILY_JUNIPER:
840	sq_conf.num_ps_gprs = 93;
841	sq_conf.num_vs_gprs = 46;
842	sq_conf.num_temp_gprs = 4;
843	sq_conf.num_gs_gprs = 31;
844	sq_conf.num_es_gprs = 31;
845	sq_conf.num_hs_gprs = 23;
846	sq_conf.num_ls_gprs = 23;
847	sq_conf.num_ps_threads = 128;
848	sq_conf.num_vs_threads = 20;
849	sq_conf.num_gs_threads = 20;
850	sq_conf.num_es_threads = 20;
851	sq_conf.num_hs_threads = 20;
852	sq_conf.num_ls_threads = 20;
853	sq_conf.num_ps_stack_entries = 85;
854	sq_conf.num_vs_stack_entries = 85;
855	sq_conf.num_gs_stack_entries = 85;
856	sq_conf.num_es_stack_entries = 85;
857	sq_conf.num_hs_stack_entries = 85;
858	sq_conf.num_ls_stack_entries = 85;
859	break;
860    case CHIP_FAMILY_CYPRESS:
861    case CHIP_FAMILY_HEMLOCK:
862	sq_conf.num_ps_gprs = 93;
863	sq_conf.num_vs_gprs = 46;
864	sq_conf.num_temp_gprs = 4;
865	sq_conf.num_gs_gprs = 31;
866	sq_conf.num_es_gprs = 31;
867	sq_conf.num_hs_gprs = 23;
868	sq_conf.num_ls_gprs = 23;
869	sq_conf.num_ps_threads = 128;
870	sq_conf.num_vs_threads = 20;
871	sq_conf.num_gs_threads = 20;
872	sq_conf.num_es_threads = 20;
873	sq_conf.num_hs_threads = 20;
874	sq_conf.num_ls_threads = 20;
875	sq_conf.num_ps_stack_entries = 85;
876	sq_conf.num_vs_stack_entries = 85;
877	sq_conf.num_gs_stack_entries = 85;
878	sq_conf.num_es_stack_entries = 85;
879	sq_conf.num_hs_stack_entries = 85;
880	sq_conf.num_ls_stack_entries = 85;
881	break;
882    case CHIP_FAMILY_PALM:
883	sq_conf.num_ps_gprs = 93;
884	sq_conf.num_vs_gprs = 46;
885	sq_conf.num_temp_gprs = 4;
886	sq_conf.num_gs_gprs = 31;
887	sq_conf.num_es_gprs = 31;
888	sq_conf.num_hs_gprs = 23;
889	sq_conf.num_ls_gprs = 23;
890	sq_conf.num_ps_threads = 96;
891	sq_conf.num_vs_threads = 16;
892	sq_conf.num_gs_threads = 16;
893	sq_conf.num_es_threads = 16;
894	sq_conf.num_hs_threads = 16;
895	sq_conf.num_ls_threads = 16;
896	sq_conf.num_ps_stack_entries = 42;
897	sq_conf.num_vs_stack_entries = 42;
898	sq_conf.num_gs_stack_entries = 42;
899	sq_conf.num_es_stack_entries = 42;
900	sq_conf.num_hs_stack_entries = 42;
901	sq_conf.num_ls_stack_entries = 42;
902	break;
903    case CHIP_FAMILY_BARTS:
904	sq_conf.num_ps_gprs = 93;
905	sq_conf.num_vs_gprs = 46;
906	sq_conf.num_temp_gprs = 4;
907	sq_conf.num_gs_gprs = 31;
908	sq_conf.num_es_gprs = 31;
909	sq_conf.num_hs_gprs = 23;
910	sq_conf.num_ls_gprs = 23;
911	sq_conf.num_ps_threads = 128;
912	sq_conf.num_vs_threads = 20;
913	sq_conf.num_gs_threads = 20;
914	sq_conf.num_es_threads = 20;
915	sq_conf.num_hs_threads = 20;
916	sq_conf.num_ls_threads = 20;
917	sq_conf.num_ps_stack_entries = 85;
918	sq_conf.num_vs_stack_entries = 85;
919	sq_conf.num_gs_stack_entries = 85;
920	sq_conf.num_es_stack_entries = 85;
921	sq_conf.num_hs_stack_entries = 85;
922	sq_conf.num_ls_stack_entries = 85;
923	break;
924    case CHIP_FAMILY_TURKS:
925	sq_conf.num_ps_gprs = 93;
926	sq_conf.num_vs_gprs = 46;
927	sq_conf.num_temp_gprs = 4;
928	sq_conf.num_gs_gprs = 31;
929	sq_conf.num_es_gprs = 31;
930	sq_conf.num_hs_gprs = 23;
931	sq_conf.num_ls_gprs = 23;
932	sq_conf.num_ps_threads = 128;
933	sq_conf.num_vs_threads = 20;
934	sq_conf.num_gs_threads = 20;
935	sq_conf.num_es_threads = 20;
936	sq_conf.num_hs_threads = 20;
937	sq_conf.num_ls_threads = 20;
938	sq_conf.num_ps_stack_entries = 42;
939	sq_conf.num_vs_stack_entries = 42;
940	sq_conf.num_gs_stack_entries = 42;
941	sq_conf.num_es_stack_entries = 42;
942	sq_conf.num_hs_stack_entries = 42;
943	sq_conf.num_ls_stack_entries = 42;
944	break;
945    case CHIP_FAMILY_CAICOS:
946	sq_conf.num_ps_gprs = 93;
947	sq_conf.num_vs_gprs = 46;
948	sq_conf.num_temp_gprs = 4;
949	sq_conf.num_gs_gprs = 31;
950	sq_conf.num_es_gprs = 31;
951	sq_conf.num_hs_gprs = 23;
952	sq_conf.num_ls_gprs = 23;
953	sq_conf.num_ps_threads = 128;
954	sq_conf.num_vs_threads = 10;
955	sq_conf.num_gs_threads = 10;
956	sq_conf.num_es_threads = 10;
957	sq_conf.num_hs_threads = 10;
958	sq_conf.num_ls_threads = 10;
959	sq_conf.num_ps_stack_entries = 42;
960	sq_conf.num_vs_stack_entries = 42;
961	sq_conf.num_gs_stack_entries = 42;
962	sq_conf.num_es_stack_entries = 42;
963	sq_conf.num_hs_stack_entries = 42;
964	sq_conf.num_ls_stack_entries = 42;
965	break;
966    }
967
968    evergreen_sq_setup(pScrn, &sq_conf);
969
970    BEGIN_BATCH(24);
971    EREG(SQ_LDS_ALLOC_PS, 0);
972    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
973
974    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
975    E32(0);
976    E32(0);
977    E32(0);
978    E32(0);
979    E32(0);
980    E32(0);
981
982    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
983    E32(0);
984    E32(0);
985    E32(0);
986    E32(0);
987
988    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
989    E32(0);
990    E32(0);
991    END_BATCH();
992
993    /* DB */
994    BEGIN_BATCH(3 + 2);
995    EREG(DB_Z_INFO,                           0);
996    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
997    END_BATCH();
998
999    BEGIN_BATCH(3 + 2);
1000    EREG(DB_STENCIL_INFO,                     0);
1001    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1002    END_BATCH();
1003
1004    BEGIN_BATCH(3 + 2);
1005    EREG(DB_HTILE_DATA_BASE,                    0);
1006    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1007    END_BATCH();
1008
1009    BEGIN_BATCH(49);
1010    EREG(DB_DEPTH_CONTROL,                    0);
1011
1012    PACK0(PA_SC_VPORT_ZMIN_0, 2);
1013    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1014    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1015
1016    PACK0(DB_RENDER_CONTROL, 5);
1017    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1018    E32(0); // DB_COUNT_CONTROL
1019    E32(0); // DB_DEPTH_VIEW
1020    E32(0x2a); // DB_RENDER_OVERRIDE
1021    E32(0); // DB_RENDER_OVERRIDE2
1022
1023    PACK0(DB_STENCIL_CLEAR, 2);
1024    E32(0); // DB_STENCIL_CLEAR
1025    E32(0); // DB_DEPTH_CLEAR
1026
1027    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1028					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1029					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1030					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1031
1032    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1033			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1034
1035    // SX
1036    EREG(SX_MISC,               0);
1037
1038    // CB
1039    PACK0(SX_ALPHA_TEST_CONTROL, 5);
1040    E32(0); // SX_ALPHA_TEST_CONTROL
1041    E32(0x00000000); //CB_BLEND_RED
1042    E32(0x00000000); //CB_BLEND_GREEN
1043    E32(0x00000000); //CB_BLEND_BLUE
1044    E32(0x00000000); //CB_BLEND_ALPHA
1045
1046    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1047
1048    // SC
1049    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1050					       (0 << WINDOW_Y_OFFSET_shift)));
1051    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1052    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1053    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1054    END_BATCH();
1055
1056    /* clip boolean is set to always visible -> doesn't matter */
1057    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1058	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1059
1060    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1061	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1062
1063    BEGIN_BATCH(57);
1064    PACK0(PA_SC_MODE_CNTL_0, 2);
1065    E32(0); // PA_SC_MODE_CNTL_0
1066    E32(0); // PA_SC_MODE_CNTL_1
1067
1068    PACK0(PA_SC_LINE_CNTL, 16);
1069    E32(0); // PA_SC_LINE_CNTL
1070    E32(0); // PA_SC_AA_CONFIG
1071    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1072	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1073    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1074    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1075    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1076    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1077    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1078    E32(0);
1079    E32(0);
1080    E32(0);
1081    E32(0);
1082    E32(0);
1083    E32(0);
1084    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1085    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1086
1087    // CL
1088    PACK0(PA_CL_CLIP_CNTL, 8);
1089    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1090    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1091    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1092    E32(0); // PA_CL_VS_OUT_CNTL
1093    E32(0); // PA_CL_NANINF_CNTL
1094    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1095    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1096    E32(0); // PA_SU_PRIM_FILTER_CNTL
1097
1098    // SU
1099    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1100    E32(0);
1101    E32(0);
1102    E32(0);
1103    E32(0);
1104    E32(0);
1105    E32(0);
1106
1107    /* src = semantic id 0; mask = semantic id 1 */
1108    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1109			   (1 << SEMANTIC_1_shift)));
1110    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1111    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1112    E32(((0    << SEMANTIC_shift)	|
1113	 (0x01 << DEFAULT_VAL_shift)));
1114    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1115    E32(((1    << SEMANTIC_shift)	|
1116	 (0x01 << DEFAULT_VAL_shift)));
1117
1118    PACK0(SPI_INPUT_Z, 8);
1119    E32(0); // SPI_INPUT_Z
1120    E32(0); // SPI_FOG_CNTL
1121    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1122    E32(0); // SPI_PS_IN_CONTROL_2
1123    E32(0);
1124    E32(0);
1125    E32(0);
1126    E32(0);
1127    END_BATCH();
1128
1129    // clear FS
1130    fs_conf.bo = accel_state->shaders_bo;
1131    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1132
1133    // VGT
1134    BEGIN_BATCH(46);
1135
1136    PACK0(VGT_MAX_VTX_INDX, 4);
1137    E32(0xffffff);
1138    E32(0);
1139    E32(0);
1140    E32(0);
1141
1142    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1143    E32(0);
1144    E32(0);
1145
1146    PACK0(VGT_REUSE_OFF, 2);
1147    E32(0);
1148    E32(0);
1149
1150    PACK0(PA_SU_POINT_SIZE, 17);
1151    E32(0); // PA_SU_POINT_SIZE
1152    E32(0); // PA_SU_POINT_MINMAX
1153    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1154    E32(0); // PA_SC_LINE_STIPPLE
1155    E32(0); // VGT_OUTPUT_PATH_CNTL
1156    E32(0); // VGT_HOS_CNTL
1157    E32(0);
1158    E32(0);
1159    E32(0);
1160    E32(0);
1161    E32(0);
1162    E32(0);
1163    E32(0);
1164    E32(0);
1165    E32(0);
1166    E32(0);
1167    E32(0); // VGT_GS_MODE
1168
1169    EREG(VGT_PRIMITIVEID_EN,                  0);
1170    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1171    EREG(VGT_SHADER_STAGES_EN,          0);
1172
1173    PACK0(VGT_STRMOUT_CONFIG, 2);
1174    E32(0);
1175    E32(0);
1176    END_BATCH();
1177}
1178
1179
1180/*
1181 * Commands
1182 */
1183
1184void
1185evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1186{
1187    RADEONInfoPtr info = RADEONPTR(pScrn);
1188
1189    BEGIN_BATCH(10);
1190    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1191    PACK3(IT_INDEX_TYPE, 1);
1192#if X_BYTE_ORDER == X_BIG_ENDIAN
1193    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1194#else
1195    E32(draw_conf->index_type);
1196#endif
1197    PACK3(IT_NUM_INSTANCES, 1);
1198    E32(draw_conf->num_instances);
1199    PACK3(IT_DRAW_INDEX_AUTO, 2);
1200    E32(draw_conf->num_indices);
1201    E32(draw_conf->vgt_draw_initiator);
1202    END_BATCH();
1203}
1204
1205void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1206{
1207    RADEONInfoPtr info = RADEONPTR(pScrn);
1208    struct radeon_accel_state *accel_state = info->accel_state;
1209    draw_config_t   draw_conf;
1210    vtx_resource_t  vtx_res;
1211
1212    if (accel_state->vbo.vb_start_op == -1)
1213      return;
1214
1215    CLEAR (draw_conf);
1216    CLEAR (vtx_res);
1217
1218    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1219	radeon_ib_discard(pScrn);
1220	radeon_cs_flush_indirect(pScrn);
1221	return;
1222    }
1223
1224    /* Vertex buffer setup */
1225    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1226    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1227    vtx_res.vtx_size_dw     = vtx_size / 4;
1228    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1229    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
1230    vtx_res.bo              = accel_state->vbo.vb_bo;
1231    vtx_res.dst_sel_x       = SQ_SEL_X;
1232    vtx_res.dst_sel_y       = SQ_SEL_Y;
1233    vtx_res.dst_sel_z       = SQ_SEL_Z;
1234    vtx_res.dst_sel_w       = SQ_SEL_W;
1235#if X_BYTE_ORDER == X_BIG_ENDIAN
1236    vtx_res.endian          = SQ_ENDIAN_8IN32;
1237#endif
1238    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1239
1240    /* Draw */
1241    draw_conf.prim_type          = DI_PT_RECTLIST;
1242    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1243    draw_conf.num_instances      = 1;
1244    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1245    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1246
1247    evergreen_draw_auto(pScrn, &draw_conf);
1248
1249    /* sync dst surface */
1250    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1251				  accel_state->dst_size, accel_state->dst_obj.offset,
1252				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1253
1254    accel_state->vbo.vb_start_op = -1;
1255    accel_state->cbuf.vb_start_op = -1;
1256    accel_state->ib_reset_op = 0;
1257
1258}
1259
1260#endif
1261