evergreen_accel.c revision c4ae5be6
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#ifdef XF86DRM_MODE
31
32#include "xf86.h"
33
34#include <errno.h>
35
36#include "radeon.h"
37#include "evergreen_shader.h"
38#include "radeon_reg.h"
39#include "evergreen_reg.h"
40#include "evergreen_state.h"
41
42#include "radeon_drm.h"
43#include "radeon_vbo.h"
44#include "radeon_exa_shared.h"
45
46static const uint32_t EVERGREEN_ROP[16] = {
47    RADEON_ROP3_ZERO, /* GXclear        */
48    RADEON_ROP3_DSa,  /* Gxand          */
49    RADEON_ROP3_SDna, /* GXandReverse   */
50    RADEON_ROP3_S,    /* GXcopy         */
51    RADEON_ROP3_DSna, /* GXandInverted  */
52    RADEON_ROP3_D,    /* GXnoop         */
53    RADEON_ROP3_DSx,  /* GXxor          */
54    RADEON_ROP3_DSo,  /* GXor           */
55    RADEON_ROP3_DSon, /* GXnor          */
56    RADEON_ROP3_DSxn, /* GXequiv        */
57    RADEON_ROP3_Dn,   /* GXinvert       */
58    RADEON_ROP3_SDno, /* GXorReverse    */
59    RADEON_ROP3_Sn,   /* GXcopyInverted */
60    RADEON_ROP3_DSno, /* GXorInverted   */
61    RADEON_ROP3_DSan, /* GXnand         */
62    RADEON_ROP3_ONE,  /* GXset          */
63};
64
65void
66evergreen_start_3d(ScrnInfoPtr pScrn)
67{
68    RADEONInfoPtr info = RADEONPTR(pScrn);
69
70    BEGIN_BATCH(3);
71    PACK3(IT_CONTEXT_CONTROL, 2);
72    E32(0x80000000);
73    E32(0x80000000);
74    END_BATCH();
75
76}
77
78/*
79 * Setup of functional groups
80 */
81
82// asic stack/thread/gpr limits - need to query the drm
83static void
84evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
85{
86    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
87    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
88    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
89    RADEONInfoPtr info = RADEONPTR(pScrn);
90
91    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
92	(info->ChipFamily == CHIP_FAMILY_PALM) ||
93	(info->ChipFamily == CHIP_FAMILY_CAICOS))
94	sq_config = 0;
95    else
96	sq_config = VC_ENABLE_bit;
97
98    sq_config |= (EXPORT_SRC_C_bit |
99		  (sq_conf->cs_prio << CS_PRIO_shift) |
100		  (sq_conf->ls_prio << LS_PRIO_shift) |
101		  (sq_conf->hs_prio << HS_PRIO_shift) |
102		  (sq_conf->ps_prio << PS_PRIO_shift) |
103		  (sq_conf->vs_prio << VS_PRIO_shift) |
104		  (sq_conf->gs_prio << GS_PRIO_shift) |
105		  (sq_conf->es_prio << ES_PRIO_shift));
106
107    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
108			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
109			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
110    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
111			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
112    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
113			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
114
115    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
116			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
117			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
118			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
119    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
120				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
121
122    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
123				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
124
125    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
126				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
127
128    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
129				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
130
131    BEGIN_BATCH(16);
132    /* disable dyn gprs */
133    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
134    PACK0(SQ_CONFIG, 4);
135    E32(sq_config);
136    E32(sq_gpr_resource_mgmt_1);
137    E32(sq_gpr_resource_mgmt_2);
138    E32(sq_gpr_resource_mgmt_3);
139    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
140    E32(sq_thread_resource_mgmt);
141    E32(sq_thread_resource_mgmt_2);
142    E32(sq_stack_resource_mgmt_1);
143    E32(sq_stack_resource_mgmt_2);
144    E32(sq_stack_resource_mgmt_3);
145    END_BATCH();
146}
147
148/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
149 * we use here.
150 */
151void
152evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
153{
154    uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
155    int pitch, slice, h;
156    RADEONInfoPtr info = RADEONPTR(pScrn);
157
158    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
159		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
160		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
161		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
162		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
163		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
164		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
165    if (cb_conf->blend_clamp)
166	cb_color_info |= BLEND_CLAMP_bit;
167    if (cb_conf->fast_clear)
168	cb_color_info |= FAST_CLEAR_bit;
169    if (cb_conf->compression)
170	cb_color_info |= COMPRESSION_bit;
171    if (cb_conf->blend_bypass)
172	cb_color_info |= BLEND_BYPASS_bit;
173    if (cb_conf->simple_float)
174	cb_color_info |= SIMPLE_FLOAT_bit;
175    if (cb_conf->round_mode)
176	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
177    if (cb_conf->tile_compact)
178	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
179    if (cb_conf->rat)
180	cb_color_info |= RAT_bit;
181
182    /* bit 4 needs to be set for linear and depth/stencil surfaces */
183    if (cb_conf->non_disp_tiling)
184	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
185
186    pitch = (cb_conf->w / 8) - 1;
187    h = RADEON_ALIGN(cb_conf->h, 8);
188    slice = ((cb_conf->w * h) / 64) - 1;
189
190    switch (cb_conf->resource_type) {
191    case BUFFER:
192	/* number of elements in the surface */
193	cb_color_dim = pitch * slice;
194	break;
195    default:
196	/* w/h of the surface */
197	cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
198			((cb_conf->h - 1) << HEIGHT_MAX_shift));
199	break;
200    }
201
202    BEGIN_BATCH(3 + 2);
203    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
204    RELOC_BATCH(cb_conf->bo, 0, domain);
205    END_BATCH();
206
207    /* Set CMASK & FMASK buffer to the offset of color buffer as
208     * we don't use those this shouldn't cause any issue and we
209     * then have a valid cmd stream
210     */
211    BEGIN_BATCH(3 + 2);
212    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
213    RELOC_BATCH(cb_conf->bo, 0, domain);
214    END_BATCH();
215    BEGIN_BATCH(3 + 2);
216    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
217    RELOC_BATCH(cb_conf->bo, 0, domain);
218    END_BATCH();
219
220    /* tiling config */
221    BEGIN_BATCH(3 + 2);
222    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
223    RELOC_BATCH(cb_conf->bo, 0, domain);
224    END_BATCH();
225    BEGIN_BATCH(3 + 2);
226    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
227    RELOC_BATCH(cb_conf->bo, 0, domain);
228    END_BATCH();
229
230    BEGIN_BATCH(33);
231    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
232    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
233    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
234    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
235    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
236    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
237    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
238    E32(0);
239    E32(0);
240    E32(0);
241    E32(0);
242    EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
243    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
244					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
245    EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
246    END_BATCH();
247
248}
249
250static void
251evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
252			      uint32_t size, uint64_t mc_addr,
253			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
254{
255    RADEONInfoPtr info = RADEONPTR(pScrn);
256    uint32_t cp_coher_size;
257    if (size == 0xffffffff)
258	cp_coher_size = 0xffffffff;
259    else
260	cp_coher_size = ((size + 255) >> 8);
261
262    BEGIN_BATCH(5 + 2);
263    PACK3(IT_SURFACE_SYNC, 4);
264    E32(sync_type);
265    E32(cp_coher_size);
266    E32((mc_addr >> 8));
267    E32(10); /* poll interval */
268    RELOC_BATCH(bo, rdomains, wdomain);
269    END_BATCH();
270}
271
272/* inserts a wait for vline in the command stream */
273void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
274				  xf86CrtcPtr crtc, int start, int stop)
275{
276    RADEONInfoPtr  info = RADEONPTR(pScrn);
277    drmmode_crtc_private_ptr drmmode_crtc;
278    uint32_t offset;
279
280    if (!crtc)
281        return;
282
283    drmmode_crtc = crtc->driver_private;
284
285    if (stop < start)
286        return;
287
288    if (!crtc->enabled)
289        return;
290
291    if (info->cs) {
292        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
293	    return;
294    } else {
295#ifdef USE_EXA
296	if (info->useEXA)
297	    offset = exaGetPixmapOffset(pPix);
298	else
299#endif
300	    offset = pPix->devPrivate.ptr - info->FB;
301
302	/* if drawing to front buffer */
303	if (offset != 0)
304	    return;
305    }
306
307    start = max(start, 0);
308    stop = min(stop, crtc->mode.VDisplay);
309
310    if (start > crtc->mode.VDisplay)
311        return;
312
313    BEGIN_BATCH(11);
314    /* set the VLINE range */
315    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
316	 (start << EVERGREEN_VLINE_START_SHIFT) |
317	 (stop << EVERGREEN_VLINE_END_SHIFT));
318
319    /* tell the CP to poll the VLINE state register */
320    PACK3(IT_WAIT_REG_MEM, 6);
321    E32(IT_WAIT_REG | IT_WAIT_EQ);
322    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
323    E32(0);
324    E32(0);                          // Ref value
325    E32(EVERGREEN_VLINE_STAT);    // Mask
326    E32(10);                         // Wait interval
327    /* add crtc reloc */
328    PACK3(IT_NOP, 1);
329    E32(drmmode_crtc->mode_crtc->crtc_id);
330    END_BATCH();
331}
332
333void
334evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
335{
336    RADEONInfoPtr info = RADEONPTR(pScrn);
337
338    BEGIN_BATCH(8);
339    /* Interpolator setup */
340    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
341    PACK0(SPI_PS_IN_CONTROL_0, 3);
342    E32(((num_interp << NUM_INTERP_shift) |
343	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
344    E32(0); // SPI_PS_IN_CONTROL_1
345    E32(0); // SPI_INTERP_CONTROL_0
346    END_BATCH();
347}
348
349void
350evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
351{
352    RADEONInfoPtr info = RADEONPTR(pScrn);
353    uint32_t sq_pgm_resources;
354
355    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
356			(fs_conf->stack_size << STACK_SIZE_shift));
357
358    if (fs_conf->dx10_clamp)
359	sq_pgm_resources |= DX10_CLAMP_bit;
360
361    BEGIN_BATCH(3 + 2);
362    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
363    RELOC_BATCH(fs_conf->bo, domain, 0);
364    END_BATCH();
365
366    BEGIN_BATCH(3);
367    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
368    END_BATCH();
369}
370
371/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
372 * but none that we use here.
373 */
374void
375evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
376{
377    RADEONInfoPtr info = RADEONPTR(pScrn);
378    uint32_t sq_pgm_resources, sq_pgm_resources_2;
379
380    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
381			(vs_conf->stack_size << STACK_SIZE_shift));
382
383    if (vs_conf->dx10_clamp)
384	sq_pgm_resources |= DX10_CLAMP_bit;
385    if (vs_conf->uncached_first_inst)
386	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
387
388    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
389			  (vs_conf->double_round << DOUBLE_ROUND_shift));
390
391    if (vs_conf->allow_sdi)
392	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
393    if (vs_conf->allow_sd0)
394	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
395    if (vs_conf->allow_ddi)
396	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
397    if (vs_conf->allow_ddo)
398	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
399
400    /* flush SQ cache */
401    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
402				  vs_conf->shader_size, vs_conf->shader_addr,
403				  vs_conf->bo, domain, 0);
404
405    BEGIN_BATCH(3 + 2);
406    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
407    RELOC_BATCH(vs_conf->bo, domain, 0);
408    END_BATCH();
409
410    BEGIN_BATCH(4);
411    PACK0(SQ_PGM_RESOURCES_VS, 2);
412    E32(sq_pgm_resources);
413    E32(sq_pgm_resources_2);
414    END_BATCH();
415}
416
417/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
418 * but none that we use here.
419 */
420void
421evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
422{
423    RADEONInfoPtr info = RADEONPTR(pScrn);
424    uint32_t sq_pgm_resources, sq_pgm_resources_2;
425
426    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
427			(ps_conf->stack_size << STACK_SIZE_shift));
428
429    if (ps_conf->dx10_clamp)
430	sq_pgm_resources |= DX10_CLAMP_bit;
431    if (ps_conf->uncached_first_inst)
432	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
433    if (ps_conf->clamp_consts)
434	sq_pgm_resources |= CLAMP_CONSTS_bit;
435
436    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
437			  (ps_conf->double_round << DOUBLE_ROUND_shift));
438
439    if (ps_conf->allow_sdi)
440	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
441    if (ps_conf->allow_sd0)
442	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
443    if (ps_conf->allow_ddi)
444	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
445    if (ps_conf->allow_ddo)
446	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
447
448    /* flush SQ cache */
449    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
450				  ps_conf->shader_size, ps_conf->shader_addr,
451				  ps_conf->bo, domain, 0);
452
453    BEGIN_BATCH(3 + 2);
454    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
455    RELOC_BATCH(ps_conf->bo, domain, 0);
456    END_BATCH();
457
458    BEGIN_BATCH(5);
459    PACK0(SQ_PGM_RESOURCES_PS, 3);
460    E32(sq_pgm_resources);
461    E32(sq_pgm_resources_2);
462    E32(ps_conf->export_mode);
463    END_BATCH();
464}
465
466void
467evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
468{
469    RADEONInfoPtr info = RADEONPTR(pScrn);
470    /* size reg is units of 16 consts (4 dwords each) */
471    uint32_t size = const_conf->size_bytes >> 8;
472
473    if (size == 0)
474	size = 1;
475
476    /* flush SQ cache */
477    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
478				  const_conf->size_bytes, const_conf->const_addr,
479				  const_conf->bo, domain, 0);
480
481    switch (const_conf->type) {
482    case SHADER_TYPE_VS:
483	BEGIN_BATCH(3);
484	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
485	END_BATCH();
486	BEGIN_BATCH(3 + 2);
487	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
488	RELOC_BATCH(const_conf->bo, domain, 0);
489	END_BATCH();
490	break;
491    case SHADER_TYPE_PS:
492	BEGIN_BATCH(3);
493	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
494	END_BATCH();
495	BEGIN_BATCH(3 + 2);
496	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
497	RELOC_BATCH(const_conf->bo, domain, 0);
498	END_BATCH();
499	break;
500    default:
501	ErrorF("Unsupported const type %d\n", const_conf->type);
502	break;
503    }
504
505}
506
507void
508evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
509{
510    RADEONInfoPtr info = RADEONPTR(pScrn);
511    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
512     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
513     */
514    BEGIN_BATCH(3);
515    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
516    END_BATCH();
517}
518
519/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
520 * but none that we use here.
521 */
522static void
523evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
524{
525    RADEONInfoPtr info = RADEONPTR(pScrn);
526    struct radeon_accel_state *accel_state = info->accel_state;
527    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
528
529    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
530			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
531			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
532			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
533			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
534    if (res->clamp_x)
535	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
536
537    if (res->format_comp_all)
538	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
539
540    if (res->srf_mode_all)
541	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
542
543    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
544			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
545			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
546			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
547
548    if (res->uncached)
549	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
550
551    /* XXX ??? */
552    sq_vtx_constant_word4 = 0;
553
554    /* flush vertex cache */
555    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
556	(info->ChipFamily == CHIP_FAMILY_PALM) ||
557	(info->ChipFamily == CHIP_FAMILY_CAICOS) ||
558	(info->ChipFamily == CHIP_FAMILY_CAYMAN))
559	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
560				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
561				      res->bo,
562				      domain, 0);
563    else
564	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
565				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
566				      res->bo,
567				      domain, 0);
568
569    BEGIN_BATCH(10 + 2);
570    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
571    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
572    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
573    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
574    E32(sq_vtx_constant_word3);		// 3: swizzles
575    E32(sq_vtx_constant_word4);		// 4: num elements
576    E32(0);							// 5: n/a
577    E32(0);							// 6: n/a
578    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
579    RELOC_BATCH(res->bo, domain, 0);
580    END_BATCH();
581}
582
583/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
584 * but none that we use here.
585 */
586void
587evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
588{
589    RADEONInfoPtr info = RADEONPTR(pScrn);
590    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
591    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
592
593    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
594
595    if (tex_res->w)
596	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
597				  ((tex_res->w - 1) << TEX_WIDTH_shift));
598
599    if (tex_res->tile_type)
600	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
601
602    sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
603
604    if (tex_res->h)
605	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
606    if (tex_res->depth)
607	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
608
609    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
610			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
611			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
612			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
613			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
614			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
615			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
616			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
617			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
618			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
619			     (tex_res->base_level << BASE_LEVEL_shift));
620
621    if (tex_res->srf_mode_all)
622	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
623    if (tex_res->force_degamma)
624	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
625
626    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
627			     (tex_res->base_array << BASE_ARRAY_shift) |
628			     (tex_res->last_array << LAST_ARRAY_shift));
629
630    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
631			     (tex_res->perf_modulation << PERF_MODULATION_shift));
632
633    if (tex_res->interlaced)
634	sq_tex_resource_word6 |= INTERLACED_bit;
635
636    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
637			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
638
639    /* flush texture cache */
640    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
641				  tex_res->size, tex_res->base,
642				  tex_res->bo, domain, 0);
643
644    BEGIN_BATCH(10 + 4);
645    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
646    E32(sq_tex_resource_word0);
647    E32(sq_tex_resource_word1);
648    E32(((tex_res->base) >> 8));
649    E32(((tex_res->mip_base) >> 8));
650    E32(sq_tex_resource_word4);
651    E32(sq_tex_resource_word5);
652    E32(sq_tex_resource_word6);
653    E32(sq_tex_resource_word7);
654    RELOC_BATCH(tex_res->bo, domain, 0);
655    RELOC_BATCH(tex_res->mip_bo, domain, 0);
656    END_BATCH();
657}
658
659/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
660 * but none that we use here.
661 */
662void
663evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
664{
665    RADEONInfoPtr info = RADEONPTR(pScrn);
666    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
667
668    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
669			    (s->clamp_y       << CLAMP_Y_shift)					|
670			    (s->clamp_z       << CLAMP_Z_shift)					|
671			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
672			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
673			    (s->z_filter      << Z_FILTER_shift)	|
674			    (s->mip_filter    << MIP_FILTER_shift)				|
675			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
676			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
677			    (s->chroma_key    << CHROMA_KEY_shift));
678
679    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
680			    (s->max_lod       << MAX_LOD_shift)					|
681			    (s->perf_mip      << PERF_MIP_shift)	|
682			    (s->perf_z        << PERF_Z_shift));
683
684
685    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
686			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
687
688    if (s->mc_coord_truncate)
689	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
690    if (s->force_degamma)
691	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
692    if (s->truncate_coord)
693	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
694    if (s->disable_cube_wrap)
695	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
696    if (s->type)
697	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
698
699    BEGIN_BATCH(5);
700    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
701    E32(sq_tex_sampler_word0);
702    E32(sq_tex_sampler_word1);
703    E32(sq_tex_sampler_word2);
704    END_BATCH();
705}
706
707/* workarounds for hw bugs in eg+ */
708/* only affects screen/window/generic/vport.  cliprects are not affected */
709static void
710evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2)
711{
712    RADEONInfoPtr info = RADEONPTR(pScrn);
713
714    /* all eg+ asics */
715    if (*x2 == 0)
716	*x1 = 1;
717    if (*y2 == 0)
718	*y1 = 1;
719
720    /* cayman only */
721    if (info->ChipFamily == CHIP_FAMILY_CAYMAN) {
722	/* cliprects aren't affected so we can use them to clip if we need
723	 * a true 1x1 clip region
724	 */
725	if ((*x2 == 1) && (*y2 == 1))
726	    *x2 = 2;
727    }
728}
729
730//XXX deal with clip offsets in clip setup
731void
732evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
733{
734    RADEONInfoPtr info = RADEONPTR(pScrn);
735
736    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
737
738    BEGIN_BATCH(4);
739    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
740    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
741	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
742    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
743	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
744    END_BATCH();
745}
746
747void
748evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
749{
750    RADEONInfoPtr info = RADEONPTR(pScrn);
751
752    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
753
754    BEGIN_BATCH(4);
755    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
756    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
757	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
758	 WINDOW_OFFSET_DISABLE_bit));
759    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
760	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
761    END_BATCH();
762}
763
764void
765evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
766{
767    RADEONInfoPtr info = RADEONPTR(pScrn);
768
769    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
770
771    BEGIN_BATCH(4);
772    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
773    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
774	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
775	 WINDOW_OFFSET_DISABLE_bit));
776    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
777	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
778    END_BATCH();
779}
780
781void
782evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
783{
784    RADEONInfoPtr info = RADEONPTR(pScrn);
785
786    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
787
788    BEGIN_BATCH(4);
789    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
790    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
791	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
792	 WINDOW_OFFSET_DISABLE_bit));
793    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
794	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
795    END_BATCH();
796}
797
798void
799evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
800{
801    RADEONInfoPtr info = RADEONPTR(pScrn);
802
803    BEGIN_BATCH(4);
804    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
805    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
806	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
807    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
808	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
809    END_BATCH();
810}
811
812/*
813 * Setup of default state
814 */
815
816void
817evergreen_set_default_state(ScrnInfoPtr pScrn)
818{
819    tex_resource_t tex_res;
820    shader_config_t fs_conf;
821    sq_config_t sq_conf;
822    int i;
823    RADEONInfoPtr info = RADEONPTR(pScrn);
824    struct radeon_accel_state *accel_state = info->accel_state;
825
826    if (info->ChipFamily == CHIP_FAMILY_CAYMAN) {
827	cayman_set_default_state(pScrn);
828	return;
829    }
830
831    if (accel_state->XInited3D)
832	return;
833
834    memset(&tex_res, 0, sizeof(tex_resource_t));
835    memset(&fs_conf, 0, sizeof(shader_config_t));
836
837    accel_state->XInited3D = TRUE;
838
839    evergreen_start_3d(pScrn);
840
841    /* SQ */
842    sq_conf.ps_prio = 0;
843    sq_conf.vs_prio = 1;
844    sq_conf.gs_prio = 2;
845    sq_conf.es_prio = 3;
846    sq_conf.hs_prio = 0;
847    sq_conf.ls_prio = 0;
848    sq_conf.cs_prio = 0;
849
850    switch (info->ChipFamily) {
851    case CHIP_FAMILY_CEDAR:
852    default:
853	sq_conf.num_ps_gprs = 93;
854	sq_conf.num_vs_gprs = 46;
855	sq_conf.num_temp_gprs = 4;
856	sq_conf.num_gs_gprs = 31;
857	sq_conf.num_es_gprs = 31;
858	sq_conf.num_hs_gprs = 23;
859	sq_conf.num_ls_gprs = 23;
860	sq_conf.num_ps_threads = 96;
861	sq_conf.num_vs_threads = 16;
862	sq_conf.num_gs_threads = 16;
863	sq_conf.num_es_threads = 16;
864	sq_conf.num_hs_threads = 16;
865	sq_conf.num_ls_threads = 16;
866	sq_conf.num_ps_stack_entries = 42;
867	sq_conf.num_vs_stack_entries = 42;
868	sq_conf.num_gs_stack_entries = 42;
869	sq_conf.num_es_stack_entries = 42;
870	sq_conf.num_hs_stack_entries = 42;
871	sq_conf.num_ls_stack_entries = 42;
872	break;
873    case CHIP_FAMILY_REDWOOD:
874	sq_conf.num_ps_gprs = 93;
875	sq_conf.num_vs_gprs = 46;
876	sq_conf.num_temp_gprs = 4;
877	sq_conf.num_gs_gprs = 31;
878	sq_conf.num_es_gprs = 31;
879	sq_conf.num_hs_gprs = 23;
880	sq_conf.num_ls_gprs = 23;
881	sq_conf.num_ps_threads = 128;
882	sq_conf.num_vs_threads = 20;
883	sq_conf.num_gs_threads = 20;
884	sq_conf.num_es_threads = 20;
885	sq_conf.num_hs_threads = 20;
886	sq_conf.num_ls_threads = 20;
887	sq_conf.num_ps_stack_entries = 42;
888	sq_conf.num_vs_stack_entries = 42;
889	sq_conf.num_gs_stack_entries = 42;
890	sq_conf.num_es_stack_entries = 42;
891	sq_conf.num_hs_stack_entries = 42;
892	sq_conf.num_ls_stack_entries = 42;
893	break;
894    case CHIP_FAMILY_JUNIPER:
895	sq_conf.num_ps_gprs = 93;
896	sq_conf.num_vs_gprs = 46;
897	sq_conf.num_temp_gprs = 4;
898	sq_conf.num_gs_gprs = 31;
899	sq_conf.num_es_gprs = 31;
900	sq_conf.num_hs_gprs = 23;
901	sq_conf.num_ls_gprs = 23;
902	sq_conf.num_ps_threads = 128;
903	sq_conf.num_vs_threads = 20;
904	sq_conf.num_gs_threads = 20;
905	sq_conf.num_es_threads = 20;
906	sq_conf.num_hs_threads = 20;
907	sq_conf.num_ls_threads = 20;
908	sq_conf.num_ps_stack_entries = 85;
909	sq_conf.num_vs_stack_entries = 85;
910	sq_conf.num_gs_stack_entries = 85;
911	sq_conf.num_es_stack_entries = 85;
912	sq_conf.num_hs_stack_entries = 85;
913	sq_conf.num_ls_stack_entries = 85;
914	break;
915    case CHIP_FAMILY_CYPRESS:
916    case CHIP_FAMILY_HEMLOCK:
917	sq_conf.num_ps_gprs = 93;
918	sq_conf.num_vs_gprs = 46;
919	sq_conf.num_temp_gprs = 4;
920	sq_conf.num_gs_gprs = 31;
921	sq_conf.num_es_gprs = 31;
922	sq_conf.num_hs_gprs = 23;
923	sq_conf.num_ls_gprs = 23;
924	sq_conf.num_ps_threads = 128;
925	sq_conf.num_vs_threads = 20;
926	sq_conf.num_gs_threads = 20;
927	sq_conf.num_es_threads = 20;
928	sq_conf.num_hs_threads = 20;
929	sq_conf.num_ls_threads = 20;
930	sq_conf.num_ps_stack_entries = 85;
931	sq_conf.num_vs_stack_entries = 85;
932	sq_conf.num_gs_stack_entries = 85;
933	sq_conf.num_es_stack_entries = 85;
934	sq_conf.num_hs_stack_entries = 85;
935	sq_conf.num_ls_stack_entries = 85;
936	break;
937    case CHIP_FAMILY_PALM:
938	sq_conf.num_ps_gprs = 93;
939	sq_conf.num_vs_gprs = 46;
940	sq_conf.num_temp_gprs = 4;
941	sq_conf.num_gs_gprs = 31;
942	sq_conf.num_es_gprs = 31;
943	sq_conf.num_hs_gprs = 23;
944	sq_conf.num_ls_gprs = 23;
945	sq_conf.num_ps_threads = 96;
946	sq_conf.num_vs_threads = 16;
947	sq_conf.num_gs_threads = 16;
948	sq_conf.num_es_threads = 16;
949	sq_conf.num_hs_threads = 16;
950	sq_conf.num_ls_threads = 16;
951	sq_conf.num_ps_stack_entries = 42;
952	sq_conf.num_vs_stack_entries = 42;
953	sq_conf.num_gs_stack_entries = 42;
954	sq_conf.num_es_stack_entries = 42;
955	sq_conf.num_hs_stack_entries = 42;
956	sq_conf.num_ls_stack_entries = 42;
957	break;
958    case CHIP_FAMILY_BARTS:
959	sq_conf.num_ps_gprs = 93;
960	sq_conf.num_vs_gprs = 46;
961	sq_conf.num_temp_gprs = 4;
962	sq_conf.num_gs_gprs = 31;
963	sq_conf.num_es_gprs = 31;
964	sq_conf.num_hs_gprs = 23;
965	sq_conf.num_ls_gprs = 23;
966	sq_conf.num_ps_threads = 128;
967	sq_conf.num_vs_threads = 20;
968	sq_conf.num_gs_threads = 20;
969	sq_conf.num_es_threads = 20;
970	sq_conf.num_hs_threads = 20;
971	sq_conf.num_ls_threads = 20;
972	sq_conf.num_ps_stack_entries = 85;
973	sq_conf.num_vs_stack_entries = 85;
974	sq_conf.num_gs_stack_entries = 85;
975	sq_conf.num_es_stack_entries = 85;
976	sq_conf.num_hs_stack_entries = 85;
977	sq_conf.num_ls_stack_entries = 85;
978	break;
979    case CHIP_FAMILY_TURKS:
980	sq_conf.num_ps_gprs = 93;
981	sq_conf.num_vs_gprs = 46;
982	sq_conf.num_temp_gprs = 4;
983	sq_conf.num_gs_gprs = 31;
984	sq_conf.num_es_gprs = 31;
985	sq_conf.num_hs_gprs = 23;
986	sq_conf.num_ls_gprs = 23;
987	sq_conf.num_ps_threads = 128;
988	sq_conf.num_vs_threads = 20;
989	sq_conf.num_gs_threads = 20;
990	sq_conf.num_es_threads = 20;
991	sq_conf.num_hs_threads = 20;
992	sq_conf.num_ls_threads = 20;
993	sq_conf.num_ps_stack_entries = 42;
994	sq_conf.num_vs_stack_entries = 42;
995	sq_conf.num_gs_stack_entries = 42;
996	sq_conf.num_es_stack_entries = 42;
997	sq_conf.num_hs_stack_entries = 42;
998	sq_conf.num_ls_stack_entries = 42;
999	break;
1000    case CHIP_FAMILY_CAICOS:
1001	sq_conf.num_ps_gprs = 93;
1002	sq_conf.num_vs_gprs = 46;
1003	sq_conf.num_temp_gprs = 4;
1004	sq_conf.num_gs_gprs = 31;
1005	sq_conf.num_es_gprs = 31;
1006	sq_conf.num_hs_gprs = 23;
1007	sq_conf.num_ls_gprs = 23;
1008	sq_conf.num_ps_threads = 128;
1009	sq_conf.num_vs_threads = 10;
1010	sq_conf.num_gs_threads = 10;
1011	sq_conf.num_es_threads = 10;
1012	sq_conf.num_hs_threads = 10;
1013	sq_conf.num_ls_threads = 10;
1014	sq_conf.num_ps_stack_entries = 42;
1015	sq_conf.num_vs_stack_entries = 42;
1016	sq_conf.num_gs_stack_entries = 42;
1017	sq_conf.num_es_stack_entries = 42;
1018	sq_conf.num_hs_stack_entries = 42;
1019	sq_conf.num_ls_stack_entries = 42;
1020	break;
1021    }
1022
1023    evergreen_sq_setup(pScrn, &sq_conf);
1024
1025    BEGIN_BATCH(24);
1026    EREG(SQ_LDS_ALLOC_PS, 0);
1027    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
1028
1029    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
1030    E32(0);
1031    E32(0);
1032    E32(0);
1033    E32(0);
1034    E32(0);
1035    E32(0);
1036
1037    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
1038    E32(0);
1039    E32(0);
1040    E32(0);
1041    E32(0);
1042
1043    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
1044    E32(0);
1045    E32(0);
1046    END_BATCH();
1047
1048    /* DB */
1049    BEGIN_BATCH(3 + 2);
1050    EREG(DB_Z_INFO,                           0);
1051    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1052    END_BATCH();
1053
1054    BEGIN_BATCH(3 + 2);
1055    EREG(DB_STENCIL_INFO,                     0);
1056    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1057    END_BATCH();
1058
1059    BEGIN_BATCH(3 + 2);
1060    EREG(DB_HTILE_DATA_BASE,                    0);
1061    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1062    END_BATCH();
1063
1064    BEGIN_BATCH(49);
1065    EREG(DB_DEPTH_CONTROL,                    0);
1066
1067    PACK0(PA_SC_VPORT_ZMIN_0, 2);
1068    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1069    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1070
1071    PACK0(DB_RENDER_CONTROL, 5);
1072    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1073    E32(0); // DB_COUNT_CONTROL
1074    E32(0); // DB_DEPTH_VIEW
1075    E32(0x2a); // DB_RENDER_OVERRIDE
1076    E32(0); // DB_RENDER_OVERRIDE2
1077
1078    PACK0(DB_STENCIL_CLEAR, 2);
1079    E32(0); // DB_STENCIL_CLEAR
1080    E32(0); // DB_DEPTH_CLEAR
1081
1082    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1083					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1084					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1085					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1086
1087    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1088			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1089
1090    // SX
1091    EREG(SX_MISC,               0);
1092
1093    // CB
1094    PACK0(SX_ALPHA_TEST_CONTROL, 5);
1095    E32(0); // SX_ALPHA_TEST_CONTROL
1096    E32(0x00000000); //CB_BLEND_RED
1097    E32(0x00000000); //CB_BLEND_GREEN
1098    E32(0x00000000); //CB_BLEND_BLUE
1099    E32(0x00000000); //CB_BLEND_ALPHA
1100
1101    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1102
1103    // SC
1104    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1105					       (0 << WINDOW_Y_OFFSET_shift)));
1106    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1107    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1108    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1109    END_BATCH();
1110
1111    /* clip boolean is set to always visible -> doesn't matter */
1112    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1113	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1114
1115    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1116	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1117
1118    BEGIN_BATCH(57);
1119    PACK0(PA_SC_MODE_CNTL_0, 2);
1120    E32(0); // PA_SC_MODE_CNTL_0
1121    E32(0); // PA_SC_MODE_CNTL_1
1122
1123    PACK0(PA_SC_LINE_CNTL, 16);
1124    E32(0); // PA_SC_LINE_CNTL
1125    E32(0); // PA_SC_AA_CONFIG
1126    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1127	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1128    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1129    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1130    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1131    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1132    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1133    E32(0);
1134    E32(0);
1135    E32(0);
1136    E32(0);
1137    E32(0);
1138    E32(0);
1139    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1140    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1141
1142    // CL
1143    PACK0(PA_CL_CLIP_CNTL, 8);
1144    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1145    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1146    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1147    E32(0); // PA_CL_VS_OUT_CNTL
1148    E32(0); // PA_CL_NANINF_CNTL
1149    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1150    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1151    E32(0); // PA_SU_PRIM_FILTER_CNTL
1152
1153    // SU
1154    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1155    E32(0);
1156    E32(0);
1157    E32(0);
1158    E32(0);
1159    E32(0);
1160    E32(0);
1161
1162    /* src = semantic id 0; mask = semantic id 1 */
1163    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1164			   (1 << SEMANTIC_1_shift)));
1165    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1166    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1167    E32(((0    << SEMANTIC_shift)	|
1168	 (0x01 << DEFAULT_VAL_shift)));
1169    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1170    E32(((1    << SEMANTIC_shift)	|
1171	 (0x01 << DEFAULT_VAL_shift)));
1172
1173    PACK0(SPI_INPUT_Z, 8);
1174    E32(0); // SPI_INPUT_Z
1175    E32(0); // SPI_FOG_CNTL
1176    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1177    E32(0); // SPI_PS_IN_CONTROL_2
1178    E32(0);
1179    E32(0);
1180    E32(0);
1181    E32(0);
1182    END_BATCH();
1183
1184    // clear FS
1185    fs_conf.bo = accel_state->shaders_bo;
1186    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1187
1188    // VGT
1189    BEGIN_BATCH(46);
1190
1191    PACK0(VGT_MAX_VTX_INDX, 4);
1192    E32(0xffffff);
1193    E32(0);
1194    E32(0);
1195    E32(0);
1196
1197    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1198    E32(0);
1199    E32(0);
1200
1201    PACK0(VGT_REUSE_OFF, 2);
1202    E32(0);
1203    E32(0);
1204
1205    PACK0(PA_SU_POINT_SIZE, 17);
1206    E32(0); // PA_SU_POINT_SIZE
1207    E32(0); // PA_SU_POINT_MINMAX
1208    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1209    E32(0); // PA_SC_LINE_STIPPLE
1210    E32(0); // VGT_OUTPUT_PATH_CNTL
1211    E32(0); // VGT_HOS_CNTL
1212    E32(0);
1213    E32(0);
1214    E32(0);
1215    E32(0);
1216    E32(0);
1217    E32(0);
1218    E32(0);
1219    E32(0);
1220    E32(0);
1221    E32(0);
1222    E32(0); // VGT_GS_MODE
1223
1224    EREG(VGT_PRIMITIVEID_EN,                  0);
1225    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1226    EREG(VGT_SHADER_STAGES_EN,          0);
1227
1228    PACK0(VGT_STRMOUT_CONFIG, 2);
1229    E32(0);
1230    E32(0);
1231    END_BATCH();
1232}
1233
1234
1235/*
1236 * Commands
1237 */
1238
1239void
1240evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1241{
1242    RADEONInfoPtr info = RADEONPTR(pScrn);
1243
1244    BEGIN_BATCH(10);
1245    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1246    PACK3(IT_INDEX_TYPE, 1);
1247#if X_BYTE_ORDER == X_BIG_ENDIAN
1248    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1249#else
1250    E32(draw_conf->index_type);
1251#endif
1252    PACK3(IT_NUM_INSTANCES, 1);
1253    E32(draw_conf->num_instances);
1254    PACK3(IT_DRAW_INDEX_AUTO, 2);
1255    E32(draw_conf->num_indices);
1256    E32(draw_conf->vgt_draw_initiator);
1257    END_BATCH();
1258}
1259
1260void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1261{
1262    RADEONInfoPtr info = RADEONPTR(pScrn);
1263    struct radeon_accel_state *accel_state = info->accel_state;
1264    draw_config_t   draw_conf;
1265    vtx_resource_t  vtx_res;
1266
1267    if (accel_state->vbo.vb_start_op == -1)
1268      return;
1269
1270    CLEAR (draw_conf);
1271    CLEAR (vtx_res);
1272
1273    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1274	radeon_ib_discard(pScrn);
1275	radeon_cs_flush_indirect(pScrn);
1276	return;
1277    }
1278
1279    /* Vertex buffer setup */
1280    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1281    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1282    vtx_res.vtx_size_dw     = vtx_size / 4;
1283    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1284    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
1285    vtx_res.bo              = accel_state->vbo.vb_bo;
1286    vtx_res.dst_sel_x       = SQ_SEL_X;
1287    vtx_res.dst_sel_y       = SQ_SEL_Y;
1288    vtx_res.dst_sel_z       = SQ_SEL_Z;
1289    vtx_res.dst_sel_w       = SQ_SEL_W;
1290#if X_BYTE_ORDER == X_BIG_ENDIAN
1291    vtx_res.endian          = SQ_ENDIAN_8IN32;
1292#endif
1293    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1294
1295    /* Draw */
1296    draw_conf.prim_type          = DI_PT_RECTLIST;
1297    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1298    draw_conf.num_instances      = 1;
1299    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1300    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1301
1302    evergreen_draw_auto(pScrn, &draw_conf);
1303
1304    /* sync dst surface */
1305    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1306				  accel_state->dst_size, accel_state->dst_obj.offset,
1307				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1308
1309    accel_state->vbo.vb_start_op = -1;
1310    accel_state->cbuf.vb_start_op = -1;
1311    accel_state->ib_reset_op = 0;
1312
1313}
1314
1315#endif
1316