evergreen_accel.c revision 40732134
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#ifdef XF86DRM_MODE
31
32#include "xf86.h"
33
34#include <errno.h>
35
36#include "radeon.h"
37#include "evergreen_shader.h"
38#include "radeon_reg.h"
39#include "evergreen_reg.h"
40#include "evergreen_state.h"
41
42#include "radeon_drm.h"
43#include "radeon_vbo.h"
44#include "radeon_exa_shared.h"
45
46static const uint32_t EVERGREEN_ROP[16] = {
47    RADEON_ROP3_ZERO, /* GXclear        */
48    RADEON_ROP3_DSa,  /* Gxand          */
49    RADEON_ROP3_SDna, /* GXandReverse   */
50    RADEON_ROP3_S,    /* GXcopy         */
51    RADEON_ROP3_DSna, /* GXandInverted  */
52    RADEON_ROP3_D,    /* GXnoop         */
53    RADEON_ROP3_DSx,  /* GXxor          */
54    RADEON_ROP3_DSo,  /* GXor           */
55    RADEON_ROP3_DSon, /* GXnor          */
56    RADEON_ROP3_DSxn, /* GXequiv        */
57    RADEON_ROP3_Dn,   /* GXinvert       */
58    RADEON_ROP3_SDno, /* GXorReverse    */
59    RADEON_ROP3_Sn,   /* GXcopyInverted */
60    RADEON_ROP3_DSno, /* GXorInverted   */
61    RADEON_ROP3_DSan, /* GXnand         */
62    RADEON_ROP3_ONE,  /* GXset          */
63};
64
65void
66evergreen_start_3d(ScrnInfoPtr pScrn)
67{
68    RADEONInfoPtr info = RADEONPTR(pScrn);
69
70    BEGIN_BATCH(3);
71    PACK3(IT_CONTEXT_CONTROL, 2);
72    E32(0x80000000);
73    E32(0x80000000);
74    END_BATCH();
75
76}
77
78unsigned eg_tile_split(unsigned tile_split)
79{
80	switch (tile_split) {
81	case 64:	tile_split = 0;	break;
82	case 128:	tile_split = 1;	break;
83	case 256:	tile_split = 2;	break;
84	case 512:	tile_split = 3;	break;
85	case 1024:	tile_split = 4;	break;
86	case 2048:	tile_split = 5;	break;
87	default:
88	case 4096:	tile_split = 6;	break;
89	}
90	return tile_split;
91}
92
93static unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect)
94{
95	switch (macro_tile_aspect) {
96	default:
97	case 1:	macro_tile_aspect = 0;	break;
98	case 2:	macro_tile_aspect = 1;	break;
99	case 4:	macro_tile_aspect = 2;	break;
100	case 8:	macro_tile_aspect = 3;	break;
101	}
102	return macro_tile_aspect;
103}
104
105static unsigned eg_bank_wh(unsigned bankwh)
106{
107	switch (bankwh) {
108	default:
109	case 1:	bankwh = 0;	break;
110	case 2:	bankwh = 1;	break;
111	case 4:	bankwh = 2;	break;
112	case 8:	bankwh = 3;	break;
113	}
114	return bankwh;
115}
116
117static unsigned eg_nbanks(unsigned nbanks)
118{
119	switch (nbanks) {
120	default:
121	case 2: nbanks = 0; break;
122	case 4: nbanks = 1; break;
123	case 8: nbanks = 2; break;
124	case 16: nbanks = 3; break;
125	}
126	return nbanks;
127}
128
129/*
130 * Setup of functional groups
131 */
132
133// asic stack/thread/gpr limits - need to query the drm
134static void
135evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
136{
137    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
138    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
139    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
140    RADEONInfoPtr info = RADEONPTR(pScrn);
141
142    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
143	(info->ChipFamily == CHIP_FAMILY_PALM) ||
144	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
145	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
146	(info->ChipFamily == CHIP_FAMILY_CAICOS))
147	sq_config = 0;
148    else
149	sq_config = VC_ENABLE_bit;
150
151    sq_config |= (EXPORT_SRC_C_bit |
152		  (sq_conf->cs_prio << CS_PRIO_shift) |
153		  (sq_conf->ls_prio << LS_PRIO_shift) |
154		  (sq_conf->hs_prio << HS_PRIO_shift) |
155		  (sq_conf->ps_prio << PS_PRIO_shift) |
156		  (sq_conf->vs_prio << VS_PRIO_shift) |
157		  (sq_conf->gs_prio << GS_PRIO_shift) |
158		  (sq_conf->es_prio << ES_PRIO_shift));
159
160    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
161			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
162			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
163    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
164			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
165    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
166			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
167
168    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
169			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
170			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
171			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
172    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
173				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
174
175    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
176				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
177
178    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
179				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
180
181    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
182				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
183
184    BEGIN_BATCH(16);
185    /* disable dyn gprs */
186    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
187    PACK0(SQ_CONFIG, 4);
188    E32(sq_config);
189    E32(sq_gpr_resource_mgmt_1);
190    E32(sq_gpr_resource_mgmt_2);
191    E32(sq_gpr_resource_mgmt_3);
192    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
193    E32(sq_thread_resource_mgmt);
194    E32(sq_thread_resource_mgmt_2);
195    E32(sq_stack_resource_mgmt_1);
196    E32(sq_stack_resource_mgmt_2);
197    E32(sq_stack_resource_mgmt_3);
198    END_BATCH();
199}
200
201/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
202 * we use here.
203 */
204void
205evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
206{
207    uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
208    unsigned pitch, slice, w, h, array_mode, nbanks;
209    uint32_t tile_split, macro_aspect, bankw, bankh;
210    RADEONInfoPtr info = RADEONPTR(pScrn);
211
212#if defined(XF86DRM_MODE)
213    if (cb_conf->surface) {
214	switch (cb_conf->surface->level[0].mode) {
215	case RADEON_SURF_MODE_1D:
216		array_mode = 2;
217		break;
218	case RADEON_SURF_MODE_2D:
219		array_mode = 4;
220		break;
221	default:
222		array_mode = 0;
223		break;
224	}
225	w = cb_conf->surface->level[0].npix_x;
226	h = cb_conf->surface->level[0].npix_y;
227	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
228	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
229	tile_split = cb_conf->surface->tile_split;
230	macro_aspect = cb_conf->surface->mtilea;
231	bankw = cb_conf->surface->bankw;
232	bankh = cb_conf->surface->bankh;
233	tile_split = eg_tile_split(tile_split);
234	macro_aspect = eg_macro_tile_aspect(macro_aspect);
235	bankw = eg_bank_wh(bankw);
236	bankh = eg_bank_wh(bankh);
237    } else
238#endif
239    {
240	pitch = (cb_conf->w / 8) - 1;
241	h = RADEON_ALIGN(cb_conf->h, 8);
242	slice = ((cb_conf->w * h) / 64) - 1;
243	array_mode = cb_conf->array_mode;
244	w = cb_conf->w;
245	tile_split = 4;
246	macro_aspect = 0;
247	bankw = 0;
248	bankh = 0;
249    }
250    nbanks = info->num_banks;
251    nbanks = eg_nbanks(nbanks);
252
253    cb_color_attrib |= (tile_split << CB_COLOR0_ATTRIB__TILE_SPLIT_shift)|
254		       (nbanks << CB_COLOR0_ATTRIB__NUM_BANKS_shift) |
255		       (bankw << CB_COLOR0_ATTRIB__BANK_WIDTH_shift) |
256		       (bankh << CB_COLOR0_ATTRIB__BANK_HEIGHT_shift) |
257		       (macro_aspect << CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift);
258    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
259		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
260		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
261		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
262		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
263		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
264		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
265    if (cb_conf->blend_clamp)
266	cb_color_info |= BLEND_CLAMP_bit;
267    if (cb_conf->fast_clear)
268	cb_color_info |= FAST_CLEAR_bit;
269    if (cb_conf->compression)
270	cb_color_info |= COMPRESSION_bit;
271    if (cb_conf->blend_bypass)
272	cb_color_info |= BLEND_BYPASS_bit;
273    if (cb_conf->simple_float)
274	cb_color_info |= SIMPLE_FLOAT_bit;
275    if (cb_conf->round_mode)
276	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
277    if (cb_conf->tile_compact)
278	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
279    if (cb_conf->rat)
280	cb_color_info |= RAT_bit;
281
282    /* bit 4 needs to be set for linear and depth/stencil surfaces */
283    if (cb_conf->non_disp_tiling)
284	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
285
286    switch (cb_conf->resource_type) {
287    case BUFFER:
288	/* number of elements in the surface */
289	cb_color_dim = pitch * slice;
290	break;
291    default:
292	/* w/h of the surface */
293	cb_color_dim = (((w - 1) << WIDTH_MAX_shift) |
294			((cb_conf->h - 1) << HEIGHT_MAX_shift));
295	break;
296    }
297
298    BEGIN_BATCH(3 + 2);
299    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
300    RELOC_BATCH(cb_conf->bo, 0, domain);
301    END_BATCH();
302
303    /* Set CMASK & FMASK buffer to the offset of color buffer as
304     * we don't use those this shouldn't cause any issue and we
305     * then have a valid cmd stream
306     */
307    BEGIN_BATCH(3 + 2);
308    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
309    RELOC_BATCH(cb_conf->bo, 0, domain);
310    END_BATCH();
311    BEGIN_BATCH(3 + 2);
312    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
313    RELOC_BATCH(cb_conf->bo, 0, domain);
314    END_BATCH();
315
316    /* tiling config */
317    BEGIN_BATCH(3 + 2);
318    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
319    RELOC_BATCH(cb_conf->bo, 0, domain);
320    END_BATCH();
321    BEGIN_BATCH(3 + 2);
322    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
323    RELOC_BATCH(cb_conf->bo, 0, domain);
324    END_BATCH();
325
326    BEGIN_BATCH(33);
327    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
328    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
329    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
330    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
331    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
332    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
333    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
334    E32(0);
335    E32(0);
336    E32(0);
337    E32(0);
338    EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
339    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
340					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
341    EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
342    END_BATCH();
343
344}
345
346static void
347evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
348			      uint32_t size, uint64_t mc_addr,
349			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
350{
351    RADEONInfoPtr info = RADEONPTR(pScrn);
352    uint32_t cp_coher_size;
353    if (size == 0xffffffff)
354	cp_coher_size = 0xffffffff;
355    else
356	cp_coher_size = ((size + 255) >> 8);
357
358    BEGIN_BATCH(5 + 2);
359    PACK3(IT_SURFACE_SYNC, 4);
360    E32(sync_type);
361    E32(cp_coher_size);
362    E32((mc_addr >> 8));
363    E32(10); /* poll interval */
364    RELOC_BATCH(bo, rdomains, wdomain);
365    END_BATCH();
366}
367
368/* inserts a wait for vline in the command stream */
369void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
370				  xf86CrtcPtr crtc, int start, int stop)
371{
372    RADEONInfoPtr  info = RADEONPTR(pScrn);
373    drmmode_crtc_private_ptr drmmode_crtc;
374    uint32_t offset;
375
376    if (!crtc)
377        return;
378
379    drmmode_crtc = crtc->driver_private;
380
381    if (!crtc->enabled)
382        return;
383
384    if (info->cs) {
385        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
386	    return;
387    } else {
388#ifdef USE_EXA
389	if (info->useEXA)
390	    offset = exaGetPixmapOffset(pPix);
391	else
392#endif
393	    offset = pPix->devPrivate.ptr - info->FB;
394
395	/* if drawing to front buffer */
396	if (offset != 0)
397	    return;
398    }
399
400    start = max(start, crtc->y);
401    stop = min(stop, crtc->y + crtc->mode.VDisplay);
402
403    if (start >= stop)
404        return;
405
406    BEGIN_BATCH(11);
407    /* set the VLINE range */
408    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
409	 (start << EVERGREEN_VLINE_START_SHIFT) |
410	 (stop << EVERGREEN_VLINE_END_SHIFT));
411
412    /* tell the CP to poll the VLINE state register */
413    PACK3(IT_WAIT_REG_MEM, 6);
414    E32(IT_WAIT_REG | IT_WAIT_EQ);
415    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
416    E32(0);
417    E32(0);                          // Ref value
418    E32(EVERGREEN_VLINE_STAT);    // Mask
419    E32(10);                         // Wait interval
420    /* add crtc reloc */
421    PACK3(IT_NOP, 1);
422    E32(drmmode_crtc->mode_crtc->crtc_id);
423    END_BATCH();
424}
425
426void
427evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
428{
429    RADEONInfoPtr info = RADEONPTR(pScrn);
430
431    BEGIN_BATCH(8);
432    /* Interpolator setup */
433    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
434    PACK0(SPI_PS_IN_CONTROL_0, 3);
435    E32(((num_interp << NUM_INTERP_shift) |
436	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
437    E32(0); // SPI_PS_IN_CONTROL_1
438    E32(0); // SPI_INTERP_CONTROL_0
439    END_BATCH();
440}
441
442void
443evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
444{
445    RADEONInfoPtr info = RADEONPTR(pScrn);
446    uint32_t sq_pgm_resources;
447
448    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
449			(fs_conf->stack_size << STACK_SIZE_shift));
450
451    if (fs_conf->dx10_clamp)
452	sq_pgm_resources |= DX10_CLAMP_bit;
453
454    BEGIN_BATCH(3 + 2);
455    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
456    RELOC_BATCH(fs_conf->bo, domain, 0);
457    END_BATCH();
458
459    BEGIN_BATCH(3);
460    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
461    END_BATCH();
462}
463
464/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
465 * but none that we use here.
466 */
467void
468evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
469{
470    RADEONInfoPtr info = RADEONPTR(pScrn);
471    uint32_t sq_pgm_resources, sq_pgm_resources_2;
472
473    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
474			(vs_conf->stack_size << STACK_SIZE_shift));
475
476    if (vs_conf->dx10_clamp)
477	sq_pgm_resources |= DX10_CLAMP_bit;
478    if (vs_conf->uncached_first_inst)
479	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
480
481    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
482			  (vs_conf->double_round << DOUBLE_ROUND_shift));
483
484    if (vs_conf->allow_sdi)
485	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
486    if (vs_conf->allow_sd0)
487	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
488    if (vs_conf->allow_ddi)
489	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
490    if (vs_conf->allow_ddo)
491	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
492
493    /* flush SQ cache */
494    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
495				  vs_conf->shader_size, vs_conf->shader_addr,
496				  vs_conf->bo, domain, 0);
497
498    BEGIN_BATCH(3 + 2);
499    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
500    RELOC_BATCH(vs_conf->bo, domain, 0);
501    END_BATCH();
502
503    BEGIN_BATCH(4);
504    PACK0(SQ_PGM_RESOURCES_VS, 2);
505    E32(sq_pgm_resources);
506    E32(sq_pgm_resources_2);
507    END_BATCH();
508}
509
510/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
511 * but none that we use here.
512 */
513void
514evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
515{
516    RADEONInfoPtr info = RADEONPTR(pScrn);
517    uint32_t sq_pgm_resources, sq_pgm_resources_2;
518
519    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
520			(ps_conf->stack_size << STACK_SIZE_shift));
521
522    if (ps_conf->dx10_clamp)
523	sq_pgm_resources |= DX10_CLAMP_bit;
524    if (ps_conf->uncached_first_inst)
525	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
526    if (ps_conf->clamp_consts)
527	sq_pgm_resources |= CLAMP_CONSTS_bit;
528
529    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
530			  (ps_conf->double_round << DOUBLE_ROUND_shift));
531
532    if (ps_conf->allow_sdi)
533	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
534    if (ps_conf->allow_sd0)
535	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
536    if (ps_conf->allow_ddi)
537	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
538    if (ps_conf->allow_ddo)
539	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
540
541    /* flush SQ cache */
542    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
543				  ps_conf->shader_size, ps_conf->shader_addr,
544				  ps_conf->bo, domain, 0);
545
546    BEGIN_BATCH(3 + 2);
547    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
548    RELOC_BATCH(ps_conf->bo, domain, 0);
549    END_BATCH();
550
551    BEGIN_BATCH(5);
552    PACK0(SQ_PGM_RESOURCES_PS, 3);
553    E32(sq_pgm_resources);
554    E32(sq_pgm_resources_2);
555    E32(ps_conf->export_mode);
556    END_BATCH();
557}
558
559void
560evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
561{
562    RADEONInfoPtr info = RADEONPTR(pScrn);
563    /* size reg is units of 16 consts (4 dwords each) */
564    uint32_t size = const_conf->size_bytes >> 8;
565
566    if (size == 0)
567	size = 1;
568
569#if X_BYTE_ORDER == X_BIG_ENDIAN
570    {
571	    uint32_t count = size << 4, *p = const_conf->cpu_ptr;
572
573	    while(count--) {
574		    *p = cpu_to_le32(*p);
575		    p++;
576	    }
577    }
578#endif
579
580    /* flush SQ cache */
581    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
582				  const_conf->size_bytes, const_conf->const_addr,
583				  const_conf->bo, domain, 0);
584
585    switch (const_conf->type) {
586    case SHADER_TYPE_VS:
587	BEGIN_BATCH(3);
588	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
589	END_BATCH();
590	BEGIN_BATCH(3 + 2);
591	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
592	RELOC_BATCH(const_conf->bo, domain, 0);
593	END_BATCH();
594	break;
595    case SHADER_TYPE_PS:
596	BEGIN_BATCH(3);
597	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
598	END_BATCH();
599	BEGIN_BATCH(3 + 2);
600	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
601	RELOC_BATCH(const_conf->bo, domain, 0);
602	END_BATCH();
603	break;
604    default:
605	ErrorF("Unsupported const type %d\n", const_conf->type);
606	break;
607    }
608
609}
610
611void
612evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
613{
614    RADEONInfoPtr info = RADEONPTR(pScrn);
615    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
616     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
617     */
618    BEGIN_BATCH(3);
619    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
620    END_BATCH();
621}
622
623/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
624 * but none that we use here.
625 */
626static void
627evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
628{
629    RADEONInfoPtr info = RADEONPTR(pScrn);
630    struct radeon_accel_state *accel_state = info->accel_state;
631    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
632
633    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
634			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
635			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
636			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
637			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
638    if (res->clamp_x)
639	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
640
641    if (res->format_comp_all)
642	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
643
644    if (res->srf_mode_all)
645	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
646
647    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
648			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
649			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
650			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
651
652    if (res->uncached)
653	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
654
655    /* XXX ??? */
656    sq_vtx_constant_word4 = 0;
657
658    /* flush vertex cache */
659    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
660	(info->ChipFamily == CHIP_FAMILY_PALM) ||
661	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
662	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
663	(info->ChipFamily == CHIP_FAMILY_CAICOS) ||
664	(info->ChipFamily == CHIP_FAMILY_CAYMAN) ||
665	(info->ChipFamily == CHIP_FAMILY_ARUBA))
666	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
667				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
668				      res->bo,
669				      domain, 0);
670    else
671	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
672				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
673				      res->bo,
674				      domain, 0);
675
676    BEGIN_BATCH(10 + 2);
677    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
678    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
679    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
680    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
681    E32(sq_vtx_constant_word3);		// 3: swizzles
682    E32(sq_vtx_constant_word4);		// 4: num elements
683    E32(0);							// 5: n/a
684    E32(0);							// 6: n/a
685    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
686    RELOC_BATCH(res->bo, domain, 0);
687    END_BATCH();
688}
689
690/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
691 * but none that we use here.
692 */
693void
694evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
695{
696    RADEONInfoPtr info = RADEONPTR(pScrn);
697    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
698    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
699    uint32_t array_mode, pitch, tile_split, macro_aspect, bankw, bankh, nbanks;
700
701#if defined(XF86DRM_MODE)
702    if (tex_res->surface) {
703	switch (tex_res->surface->level[0].mode) {
704	case RADEON_SURF_MODE_1D:
705		array_mode = 2;
706		break;
707	case RADEON_SURF_MODE_2D:
708		array_mode = 4;
709		break;
710	default:
711		array_mode = 0;
712		break;
713	}
714	pitch = tex_res->surface->level[0].nblk_x >> 3;
715	tile_split = tex_res->surface->tile_split;
716	macro_aspect = tex_res->surface->mtilea;
717	bankw = tex_res->surface->bankw;
718	bankh = tex_res->surface->bankh;
719	tile_split = eg_tile_split(tile_split);
720	macro_aspect = eg_macro_tile_aspect(macro_aspect);
721	bankw = eg_bank_wh(bankw);
722	bankh = eg_bank_wh(bankh);
723    } else
724#endif
725    {
726	array_mode = tex_res->array_mode;
727	pitch = (tex_res->pitch + 7) >> 3;
728	tile_split = 4;
729	macro_aspect = 0;
730	bankw = 0;
731	bankh = 0;
732    }
733    nbanks = info->num_banks;
734    nbanks = eg_nbanks(nbanks);
735
736    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
737
738    if (tex_res->w)
739	sq_tex_resource_word0 |= ( ((pitch - 1) << PITCH_shift) |
740				   ((tex_res->w - 1) << TEX_WIDTH_shift) );
741
742    if (tex_res->tile_type)
743	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
744
745    sq_tex_resource_word1 = (array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
746
747    if (tex_res->h)
748	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
749    if (tex_res->depth)
750	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
751
752    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
753			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
754			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
755			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
756			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
757			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
758			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
759			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
760			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
761			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
762			     (tex_res->base_level << BASE_LEVEL_shift));
763
764    if (tex_res->srf_mode_all)
765	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
766    if (tex_res->force_degamma)
767	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
768
769    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
770			     (tex_res->base_array << BASE_ARRAY_shift) |
771			     (tex_res->last_array << LAST_ARRAY_shift));
772
773    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
774			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
775			     (tile_split << SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift));
776
777    if (tex_res->interlaced)
778	sq_tex_resource_word6 |= INTERLACED_bit;
779
780    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
781			     (macro_aspect << SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift) |
782			     (nbanks << SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift) |
783			     (bankw << SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift) |
784			     (bankh << SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift) |
785			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
786
787    /* flush texture cache */
788    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
789				  tex_res->size, tex_res->base,
790				  tex_res->bo, domain, 0);
791
792    BEGIN_BATCH(10 + 4);
793    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
794    E32(sq_tex_resource_word0);
795    E32(sq_tex_resource_word1);
796    E32(((tex_res->base) >> 8));
797    E32(((tex_res->mip_base) >> 8));
798    E32(sq_tex_resource_word4);
799    E32(sq_tex_resource_word5);
800    E32(sq_tex_resource_word6);
801    E32(sq_tex_resource_word7);
802    RELOC_BATCH(tex_res->bo, domain, 0);
803    RELOC_BATCH(tex_res->mip_bo, domain, 0);
804    END_BATCH();
805}
806
807/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
808 * but none that we use here.
809 */
810void
811evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
812{
813    RADEONInfoPtr info = RADEONPTR(pScrn);
814    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
815
816    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
817			    (s->clamp_y       << CLAMP_Y_shift)					|
818			    (s->clamp_z       << CLAMP_Z_shift)					|
819			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
820			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
821			    (s->z_filter      << Z_FILTER_shift)	|
822			    (s->mip_filter    << MIP_FILTER_shift)				|
823			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
824			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
825			    (s->chroma_key    << CHROMA_KEY_shift));
826
827    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
828			    (s->max_lod       << MAX_LOD_shift)					|
829			    (s->perf_mip      << PERF_MIP_shift)	|
830			    (s->perf_z        << PERF_Z_shift));
831
832
833    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
834			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
835
836    if (s->mc_coord_truncate)
837	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
838    if (s->force_degamma)
839	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
840    if (s->truncate_coord)
841	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
842    if (s->disable_cube_wrap)
843	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
844    if (s->type)
845	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
846
847    BEGIN_BATCH(5);
848    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
849    E32(sq_tex_sampler_word0);
850    E32(sq_tex_sampler_word1);
851    E32(sq_tex_sampler_word2);
852    END_BATCH();
853}
854
855/* workarounds for hw bugs in eg+ */
856/* only affects screen/window/generic/vport.  cliprects are not affected */
857static void
858evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2)
859{
860    RADEONInfoPtr info = RADEONPTR(pScrn);
861
862    /* all eg+ asics */
863    if (*x2 == 0)
864	*x1 = 1;
865    if (*y2 == 0)
866	*y1 = 1;
867
868    /* cayman/tn only */
869    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
870	/* cliprects aren't affected so we can use them to clip if we need
871	 * a true 1x1 clip region
872	 */
873	if ((*x2 == 1) && (*y2 == 1))
874	    *x2 = 2;
875    }
876}
877
878//XXX deal with clip offsets in clip setup
879void
880evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
881{
882    RADEONInfoPtr info = RADEONPTR(pScrn);
883
884    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
885
886    BEGIN_BATCH(4);
887    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
888    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
889	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
890    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
891	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
892    END_BATCH();
893}
894
895void
896evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
897{
898    RADEONInfoPtr info = RADEONPTR(pScrn);
899
900    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
901
902    BEGIN_BATCH(4);
903    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
904    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
905	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
906	 WINDOW_OFFSET_DISABLE_bit));
907    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
908	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
909    END_BATCH();
910}
911
912void
913evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
914{
915    RADEONInfoPtr info = RADEONPTR(pScrn);
916
917    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
918
919    BEGIN_BATCH(4);
920    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
921    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
922	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
923	 WINDOW_OFFSET_DISABLE_bit));
924    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
925	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
926    END_BATCH();
927}
928
929void
930evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
931{
932    RADEONInfoPtr info = RADEONPTR(pScrn);
933
934    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
935
936    BEGIN_BATCH(4);
937    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
938    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
939	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
940	 WINDOW_OFFSET_DISABLE_bit));
941    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
942	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
943    END_BATCH();
944}
945
946void
947evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
948{
949    RADEONInfoPtr info = RADEONPTR(pScrn);
950
951    BEGIN_BATCH(4);
952    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
953    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
954	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
955    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
956	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
957    END_BATCH();
958}
959
960/*
961 * Setup of default state
962 */
963
964void
965evergreen_set_default_state(ScrnInfoPtr pScrn)
966{
967    tex_resource_t tex_res;
968    shader_config_t fs_conf;
969    sq_config_t sq_conf;
970    int i;
971    RADEONInfoPtr info = RADEONPTR(pScrn);
972    struct radeon_accel_state *accel_state = info->accel_state;
973
974    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
975	cayman_set_default_state(pScrn);
976	return;
977    }
978
979    if (accel_state->XInited3D)
980	return;
981
982    memset(&tex_res, 0, sizeof(tex_resource_t));
983    memset(&fs_conf, 0, sizeof(shader_config_t));
984
985    accel_state->XInited3D = TRUE;
986
987    evergreen_start_3d(pScrn);
988
989    /* SQ */
990    sq_conf.ps_prio = 0;
991    sq_conf.vs_prio = 1;
992    sq_conf.gs_prio = 2;
993    sq_conf.es_prio = 3;
994    sq_conf.hs_prio = 0;
995    sq_conf.ls_prio = 0;
996    sq_conf.cs_prio = 0;
997
998    switch (info->ChipFamily) {
999    case CHIP_FAMILY_CEDAR:
1000    default:
1001	sq_conf.num_ps_gprs = 93;
1002	sq_conf.num_vs_gprs = 46;
1003	sq_conf.num_temp_gprs = 4;
1004	sq_conf.num_gs_gprs = 31;
1005	sq_conf.num_es_gprs = 31;
1006	sq_conf.num_hs_gprs = 23;
1007	sq_conf.num_ls_gprs = 23;
1008	sq_conf.num_ps_threads = 96;
1009	sq_conf.num_vs_threads = 16;
1010	sq_conf.num_gs_threads = 16;
1011	sq_conf.num_es_threads = 16;
1012	sq_conf.num_hs_threads = 16;
1013	sq_conf.num_ls_threads = 16;
1014	sq_conf.num_ps_stack_entries = 42;
1015	sq_conf.num_vs_stack_entries = 42;
1016	sq_conf.num_gs_stack_entries = 42;
1017	sq_conf.num_es_stack_entries = 42;
1018	sq_conf.num_hs_stack_entries = 42;
1019	sq_conf.num_ls_stack_entries = 42;
1020	break;
1021    case CHIP_FAMILY_REDWOOD:
1022	sq_conf.num_ps_gprs = 93;
1023	sq_conf.num_vs_gprs = 46;
1024	sq_conf.num_temp_gprs = 4;
1025	sq_conf.num_gs_gprs = 31;
1026	sq_conf.num_es_gprs = 31;
1027	sq_conf.num_hs_gprs = 23;
1028	sq_conf.num_ls_gprs = 23;
1029	sq_conf.num_ps_threads = 128;
1030	sq_conf.num_vs_threads = 20;
1031	sq_conf.num_gs_threads = 20;
1032	sq_conf.num_es_threads = 20;
1033	sq_conf.num_hs_threads = 20;
1034	sq_conf.num_ls_threads = 20;
1035	sq_conf.num_ps_stack_entries = 42;
1036	sq_conf.num_vs_stack_entries = 42;
1037	sq_conf.num_gs_stack_entries = 42;
1038	sq_conf.num_es_stack_entries = 42;
1039	sq_conf.num_hs_stack_entries = 42;
1040	sq_conf.num_ls_stack_entries = 42;
1041	break;
1042    case CHIP_FAMILY_JUNIPER:
1043	sq_conf.num_ps_gprs = 93;
1044	sq_conf.num_vs_gprs = 46;
1045	sq_conf.num_temp_gprs = 4;
1046	sq_conf.num_gs_gprs = 31;
1047	sq_conf.num_es_gprs = 31;
1048	sq_conf.num_hs_gprs = 23;
1049	sq_conf.num_ls_gprs = 23;
1050	sq_conf.num_ps_threads = 128;
1051	sq_conf.num_vs_threads = 20;
1052	sq_conf.num_gs_threads = 20;
1053	sq_conf.num_es_threads = 20;
1054	sq_conf.num_hs_threads = 20;
1055	sq_conf.num_ls_threads = 20;
1056	sq_conf.num_ps_stack_entries = 85;
1057	sq_conf.num_vs_stack_entries = 85;
1058	sq_conf.num_gs_stack_entries = 85;
1059	sq_conf.num_es_stack_entries = 85;
1060	sq_conf.num_hs_stack_entries = 85;
1061	sq_conf.num_ls_stack_entries = 85;
1062	break;
1063    case CHIP_FAMILY_CYPRESS:
1064    case CHIP_FAMILY_HEMLOCK:
1065	sq_conf.num_ps_gprs = 93;
1066	sq_conf.num_vs_gprs = 46;
1067	sq_conf.num_temp_gprs = 4;
1068	sq_conf.num_gs_gprs = 31;
1069	sq_conf.num_es_gprs = 31;
1070	sq_conf.num_hs_gprs = 23;
1071	sq_conf.num_ls_gprs = 23;
1072	sq_conf.num_ps_threads = 128;
1073	sq_conf.num_vs_threads = 20;
1074	sq_conf.num_gs_threads = 20;
1075	sq_conf.num_es_threads = 20;
1076	sq_conf.num_hs_threads = 20;
1077	sq_conf.num_ls_threads = 20;
1078	sq_conf.num_ps_stack_entries = 85;
1079	sq_conf.num_vs_stack_entries = 85;
1080	sq_conf.num_gs_stack_entries = 85;
1081	sq_conf.num_es_stack_entries = 85;
1082	sq_conf.num_hs_stack_entries = 85;
1083	sq_conf.num_ls_stack_entries = 85;
1084	break;
1085    case CHIP_FAMILY_PALM:
1086	sq_conf.num_ps_gprs = 93;
1087	sq_conf.num_vs_gprs = 46;
1088	sq_conf.num_temp_gprs = 4;
1089	sq_conf.num_gs_gprs = 31;
1090	sq_conf.num_es_gprs = 31;
1091	sq_conf.num_hs_gprs = 23;
1092	sq_conf.num_ls_gprs = 23;
1093	sq_conf.num_ps_threads = 96;
1094	sq_conf.num_vs_threads = 16;
1095	sq_conf.num_gs_threads = 16;
1096	sq_conf.num_es_threads = 16;
1097	sq_conf.num_hs_threads = 16;
1098	sq_conf.num_ls_threads = 16;
1099	sq_conf.num_ps_stack_entries = 42;
1100	sq_conf.num_vs_stack_entries = 42;
1101	sq_conf.num_gs_stack_entries = 42;
1102	sq_conf.num_es_stack_entries = 42;
1103	sq_conf.num_hs_stack_entries = 42;
1104	sq_conf.num_ls_stack_entries = 42;
1105	break;
1106    case CHIP_FAMILY_SUMO:
1107	sq_conf.num_ps_gprs = 93;
1108	sq_conf.num_vs_gprs = 46;
1109	sq_conf.num_temp_gprs = 4;
1110	sq_conf.num_gs_gprs = 31;
1111	sq_conf.num_es_gprs = 31;
1112	sq_conf.num_hs_gprs = 23;
1113	sq_conf.num_ls_gprs = 23;
1114	sq_conf.num_ps_threads = 96;
1115	sq_conf.num_vs_threads = 25;
1116	sq_conf.num_gs_threads = 25;
1117	sq_conf.num_es_threads = 25;
1118	sq_conf.num_hs_threads = 25;
1119	sq_conf.num_ls_threads = 25;
1120	sq_conf.num_ps_stack_entries = 42;
1121	sq_conf.num_vs_stack_entries = 42;
1122	sq_conf.num_gs_stack_entries = 42;
1123	sq_conf.num_es_stack_entries = 42;
1124	sq_conf.num_hs_stack_entries = 42;
1125	sq_conf.num_ls_stack_entries = 42;
1126	break;
1127    case CHIP_FAMILY_SUMO2:
1128	sq_conf.num_ps_gprs = 93;
1129	sq_conf.num_vs_gprs = 46;
1130	sq_conf.num_temp_gprs = 4;
1131	sq_conf.num_gs_gprs = 31;
1132	sq_conf.num_es_gprs = 31;
1133	sq_conf.num_hs_gprs = 23;
1134	sq_conf.num_ls_gprs = 23;
1135	sq_conf.num_ps_threads = 96;
1136	sq_conf.num_vs_threads = 25;
1137	sq_conf.num_gs_threads = 25;
1138	sq_conf.num_es_threads = 25;
1139	sq_conf.num_hs_threads = 25;
1140	sq_conf.num_ls_threads = 25;
1141	sq_conf.num_ps_stack_entries = 85;
1142	sq_conf.num_vs_stack_entries = 85;
1143	sq_conf.num_gs_stack_entries = 85;
1144	sq_conf.num_es_stack_entries = 85;
1145	sq_conf.num_hs_stack_entries = 85;
1146	sq_conf.num_ls_stack_entries = 85;
1147	break;
1148    case CHIP_FAMILY_BARTS:
1149	sq_conf.num_ps_gprs = 93;
1150	sq_conf.num_vs_gprs = 46;
1151	sq_conf.num_temp_gprs = 4;
1152	sq_conf.num_gs_gprs = 31;
1153	sq_conf.num_es_gprs = 31;
1154	sq_conf.num_hs_gprs = 23;
1155	sq_conf.num_ls_gprs = 23;
1156	sq_conf.num_ps_threads = 128;
1157	sq_conf.num_vs_threads = 20;
1158	sq_conf.num_gs_threads = 20;
1159	sq_conf.num_es_threads = 20;
1160	sq_conf.num_hs_threads = 20;
1161	sq_conf.num_ls_threads = 20;
1162	sq_conf.num_ps_stack_entries = 85;
1163	sq_conf.num_vs_stack_entries = 85;
1164	sq_conf.num_gs_stack_entries = 85;
1165	sq_conf.num_es_stack_entries = 85;
1166	sq_conf.num_hs_stack_entries = 85;
1167	sq_conf.num_ls_stack_entries = 85;
1168	break;
1169    case CHIP_FAMILY_TURKS:
1170	sq_conf.num_ps_gprs = 93;
1171	sq_conf.num_vs_gprs = 46;
1172	sq_conf.num_temp_gprs = 4;
1173	sq_conf.num_gs_gprs = 31;
1174	sq_conf.num_es_gprs = 31;
1175	sq_conf.num_hs_gprs = 23;
1176	sq_conf.num_ls_gprs = 23;
1177	sq_conf.num_ps_threads = 128;
1178	sq_conf.num_vs_threads = 20;
1179	sq_conf.num_gs_threads = 20;
1180	sq_conf.num_es_threads = 20;
1181	sq_conf.num_hs_threads = 20;
1182	sq_conf.num_ls_threads = 20;
1183	sq_conf.num_ps_stack_entries = 42;
1184	sq_conf.num_vs_stack_entries = 42;
1185	sq_conf.num_gs_stack_entries = 42;
1186	sq_conf.num_es_stack_entries = 42;
1187	sq_conf.num_hs_stack_entries = 42;
1188	sq_conf.num_ls_stack_entries = 42;
1189	break;
1190    case CHIP_FAMILY_CAICOS:
1191	sq_conf.num_ps_gprs = 93;
1192	sq_conf.num_vs_gprs = 46;
1193	sq_conf.num_temp_gprs = 4;
1194	sq_conf.num_gs_gprs = 31;
1195	sq_conf.num_es_gprs = 31;
1196	sq_conf.num_hs_gprs = 23;
1197	sq_conf.num_ls_gprs = 23;
1198	sq_conf.num_ps_threads = 128;
1199	sq_conf.num_vs_threads = 10;
1200	sq_conf.num_gs_threads = 10;
1201	sq_conf.num_es_threads = 10;
1202	sq_conf.num_hs_threads = 10;
1203	sq_conf.num_ls_threads = 10;
1204	sq_conf.num_ps_stack_entries = 42;
1205	sq_conf.num_vs_stack_entries = 42;
1206	sq_conf.num_gs_stack_entries = 42;
1207	sq_conf.num_es_stack_entries = 42;
1208	sq_conf.num_hs_stack_entries = 42;
1209	sq_conf.num_ls_stack_entries = 42;
1210	break;
1211    }
1212
1213    evergreen_sq_setup(pScrn, &sq_conf);
1214
1215    BEGIN_BATCH(27);
1216    EREG(SQ_LDS_ALLOC_PS, 0);
1217    EREG(SQ_LDS_RESOURCE_MGMT, 0x10001000);
1218    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
1219
1220    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
1221    E32(0);
1222    E32(0);
1223    E32(0);
1224    E32(0);
1225    E32(0);
1226    E32(0);
1227
1228    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
1229    E32(0);
1230    E32(0);
1231    E32(0);
1232    E32(0);
1233
1234    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
1235    E32(0);
1236    E32(0);
1237    END_BATCH();
1238
1239    /* DB */
1240    BEGIN_BATCH(3 + 2);
1241    EREG(DB_Z_INFO,                           0);
1242    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1243    END_BATCH();
1244
1245    BEGIN_BATCH(3 + 2);
1246    EREG(DB_STENCIL_INFO,                     0);
1247    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1248    END_BATCH();
1249
1250    BEGIN_BATCH(3 + 2);
1251    EREG(DB_HTILE_DATA_BASE,                    0);
1252    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1253    END_BATCH();
1254
1255    BEGIN_BATCH(49);
1256    EREG(DB_DEPTH_CONTROL,                    0);
1257
1258    PACK0(PA_SC_VPORT_ZMIN_0, 2);
1259    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1260    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1261
1262    PACK0(DB_RENDER_CONTROL, 5);
1263    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1264    E32(0); // DB_COUNT_CONTROL
1265    E32(0); // DB_DEPTH_VIEW
1266    E32(0x2a); // DB_RENDER_OVERRIDE
1267    E32(0); // DB_RENDER_OVERRIDE2
1268
1269    PACK0(DB_STENCIL_CLEAR, 2);
1270    E32(0); // DB_STENCIL_CLEAR
1271    E32(0); // DB_DEPTH_CLEAR
1272
1273    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1274					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1275					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1276					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1277
1278    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1279			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1280
1281    // SX
1282    EREG(SX_MISC,               0);
1283
1284    // CB
1285    PACK0(SX_ALPHA_TEST_CONTROL, 5);
1286    E32(0); // SX_ALPHA_TEST_CONTROL
1287    E32(0x00000000); //CB_BLEND_RED
1288    E32(0x00000000); //CB_BLEND_GREEN
1289    E32(0x00000000); //CB_BLEND_BLUE
1290    E32(0x00000000); //CB_BLEND_ALPHA
1291
1292    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1293
1294    // SC
1295    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1296					       (0 << WINDOW_Y_OFFSET_shift)));
1297    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1298    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1299    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1300    END_BATCH();
1301
1302    /* clip boolean is set to always visible -> doesn't matter */
1303    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1304	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1305
1306    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1307	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1308
1309    BEGIN_BATCH(57);
1310    PACK0(PA_SC_MODE_CNTL_0, 2);
1311    E32(0); // PA_SC_MODE_CNTL_0
1312    E32(0); // PA_SC_MODE_CNTL_1
1313
1314    PACK0(PA_SC_LINE_CNTL, 16);
1315    E32(0); // PA_SC_LINE_CNTL
1316    E32(0); // PA_SC_AA_CONFIG
1317    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1318	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1319    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1320    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1321    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1322    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1323    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1324    E32(0);
1325    E32(0);
1326    E32(0);
1327    E32(0);
1328    E32(0);
1329    E32(0);
1330    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1331    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1332
1333    // CL
1334    PACK0(PA_CL_CLIP_CNTL, 8);
1335    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1336    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1337    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1338    E32(0); // PA_CL_VS_OUT_CNTL
1339    E32(0); // PA_CL_NANINF_CNTL
1340    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1341    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1342    E32(0); // PA_SU_PRIM_FILTER_CNTL
1343
1344    // SU
1345    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1346    E32(0);
1347    E32(0);
1348    E32(0);
1349    E32(0);
1350    E32(0);
1351    E32(0);
1352
1353    /* src = semantic id 0; mask = semantic id 1 */
1354    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1355			   (1 << SEMANTIC_1_shift)));
1356    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1357    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1358    E32(((0    << SEMANTIC_shift)	|
1359	 (0x01 << DEFAULT_VAL_shift)));
1360    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1361    E32(((1    << SEMANTIC_shift)	|
1362	 (0x01 << DEFAULT_VAL_shift)));
1363
1364    PACK0(SPI_INPUT_Z, 8);
1365    E32(0); // SPI_INPUT_Z
1366    E32(0); // SPI_FOG_CNTL
1367    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1368    E32(0); // SPI_PS_IN_CONTROL_2
1369    E32(0);
1370    E32(0);
1371    E32(0);
1372    E32(0);
1373    END_BATCH();
1374
1375    // clear FS
1376    fs_conf.bo = accel_state->shaders_bo;
1377    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1378
1379    // VGT
1380    BEGIN_BATCH(46);
1381
1382    PACK0(VGT_MAX_VTX_INDX, 4);
1383    E32(0xffffff);
1384    E32(0);
1385    E32(0);
1386    E32(0);
1387
1388    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1389    E32(0);
1390    E32(0);
1391
1392    PACK0(VGT_REUSE_OFF, 2);
1393    E32(0);
1394    E32(0);
1395
1396    PACK0(PA_SU_POINT_SIZE, 17);
1397    E32(0); // PA_SU_POINT_SIZE
1398    E32(0); // PA_SU_POINT_MINMAX
1399    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1400    E32(0); // PA_SC_LINE_STIPPLE
1401    E32(0); // VGT_OUTPUT_PATH_CNTL
1402    E32(0); // VGT_HOS_CNTL
1403    E32(0);
1404    E32(0);
1405    E32(0);
1406    E32(0);
1407    E32(0);
1408    E32(0);
1409    E32(0);
1410    E32(0);
1411    E32(0);
1412    E32(0);
1413    E32(0); // VGT_GS_MODE
1414
1415    EREG(VGT_PRIMITIVEID_EN,                  0);
1416    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1417    EREG(VGT_SHADER_STAGES_EN,          0);
1418
1419    PACK0(VGT_STRMOUT_CONFIG, 2);
1420    E32(0);
1421    E32(0);
1422    END_BATCH();
1423}
1424
1425
1426/*
1427 * Commands
1428 */
1429
1430void
1431evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1432{
1433    RADEONInfoPtr info = RADEONPTR(pScrn);
1434
1435    BEGIN_BATCH(10);
1436    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1437    PACK3(IT_INDEX_TYPE, 1);
1438#if X_BYTE_ORDER == X_BIG_ENDIAN
1439    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1440#else
1441    E32(draw_conf->index_type);
1442#endif
1443    PACK3(IT_NUM_INSTANCES, 1);
1444    E32(draw_conf->num_instances);
1445    PACK3(IT_DRAW_INDEX_AUTO, 2);
1446    E32(draw_conf->num_indices);
1447    E32(draw_conf->vgt_draw_initiator);
1448    END_BATCH();
1449}
1450
1451void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1452{
1453    RADEONInfoPtr info = RADEONPTR(pScrn);
1454    struct radeon_accel_state *accel_state = info->accel_state;
1455    draw_config_t   draw_conf;
1456    vtx_resource_t  vtx_res;
1457
1458    if (accel_state->vbo.vb_start_op == -1)
1459      return;
1460
1461    CLEAR (draw_conf);
1462    CLEAR (vtx_res);
1463
1464    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1465	radeon_ib_discard(pScrn);
1466	radeon_cs_flush_indirect(pScrn);
1467	return;
1468    }
1469
1470    /* Vertex buffer setup */
1471    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1472    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1473    vtx_res.vtx_size_dw     = vtx_size / 4;
1474    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1475    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
1476    vtx_res.bo              = accel_state->vbo.vb_bo;
1477    vtx_res.dst_sel_x       = SQ_SEL_X;
1478    vtx_res.dst_sel_y       = SQ_SEL_Y;
1479    vtx_res.dst_sel_z       = SQ_SEL_Z;
1480    vtx_res.dst_sel_w       = SQ_SEL_W;
1481#if X_BYTE_ORDER == X_BIG_ENDIAN
1482    vtx_res.endian          = SQ_ENDIAN_8IN32;
1483#endif
1484    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1485
1486    /* Draw */
1487    draw_conf.prim_type          = DI_PT_RECTLIST;
1488    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1489    draw_conf.num_instances      = 1;
1490    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1491    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1492
1493    evergreen_draw_auto(pScrn, &draw_conf);
1494
1495    /* sync dst surface */
1496    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1497				  accel_state->dst_size, accel_state->dst_obj.offset,
1498				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1499
1500    accel_state->vbo.vb_start_op = -1;
1501    accel_state->cbuf.vb_start_op = -1;
1502    accel_state->ib_reset_op = 0;
1503
1504}
1505
1506#endif
1507