evergreen_accel.c revision de2362d3
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#include "xf86.h"
31
32#include <errno.h>
33
34#include "radeon.h"
35#include "evergreen_shader.h"
36#include "radeon_reg.h"
37#include "evergreen_reg.h"
38#include "evergreen_state.h"
39
40#include "radeon_vbo.h"
41#include "radeon_exa_shared.h"
42
43static const uint32_t EVERGREEN_ROP[16] = {
44    RADEON_ROP3_ZERO, /* GXclear        */
45    RADEON_ROP3_DSa,  /* Gxand          */
46    RADEON_ROP3_SDna, /* GXandReverse   */
47    RADEON_ROP3_S,    /* GXcopy         */
48    RADEON_ROP3_DSna, /* GXandInverted  */
49    RADEON_ROP3_D,    /* GXnoop         */
50    RADEON_ROP3_DSx,  /* GXxor          */
51    RADEON_ROP3_DSo,  /* GXor           */
52    RADEON_ROP3_DSon, /* GXnor          */
53    RADEON_ROP3_DSxn, /* GXequiv        */
54    RADEON_ROP3_Dn,   /* GXinvert       */
55    RADEON_ROP3_SDno, /* GXorReverse    */
56    RADEON_ROP3_Sn,   /* GXcopyInverted */
57    RADEON_ROP3_DSno, /* GXorInverted   */
58    RADEON_ROP3_DSan, /* GXnand         */
59    RADEON_ROP3_ONE,  /* GXset          */
60};
61
62void
63evergreen_start_3d(ScrnInfoPtr pScrn)
64{
65    RADEONInfoPtr info = RADEONPTR(pScrn);
66
67    BEGIN_BATCH(3);
68    PACK3(IT_CONTEXT_CONTROL, 2);
69    E32(0x80000000);
70    E32(0x80000000);
71    END_BATCH();
72
73}
74
75unsigned eg_tile_split(unsigned tile_split)
76{
77	switch (tile_split) {
78	case 64:	tile_split = 0;	break;
79	case 128:	tile_split = 1;	break;
80	case 256:	tile_split = 2;	break;
81	case 512:	tile_split = 3;	break;
82	case 1024:	tile_split = 4;	break;
83	case 2048:	tile_split = 5;	break;
84	default:
85	case 4096:	tile_split = 6;	break;
86	}
87	return tile_split;
88}
89
90static unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect)
91{
92	switch (macro_tile_aspect) {
93	default:
94	case 1:	macro_tile_aspect = 0;	break;
95	case 2:	macro_tile_aspect = 1;	break;
96	case 4:	macro_tile_aspect = 2;	break;
97	case 8:	macro_tile_aspect = 3;	break;
98	}
99	return macro_tile_aspect;
100}
101
102static unsigned eg_bank_wh(unsigned bankwh)
103{
104	switch (bankwh) {
105	default:
106	case 1:	bankwh = 0;	break;
107	case 2:	bankwh = 1;	break;
108	case 4:	bankwh = 2;	break;
109	case 8:	bankwh = 3;	break;
110	}
111	return bankwh;
112}
113
114static unsigned eg_nbanks(unsigned nbanks)
115{
116	switch (nbanks) {
117	default:
118	case 2: nbanks = 0; break;
119	case 4: nbanks = 1; break;
120	case 8: nbanks = 2; break;
121	case 16: nbanks = 3; break;
122	}
123	return nbanks;
124}
125
126/*
127 * Setup of functional groups
128 */
129
130// asic stack/thread/gpr limits - need to query the drm
131static void
132evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
133{
134    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
135    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
136    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
137    RADEONInfoPtr info = RADEONPTR(pScrn);
138
139    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
140	(info->ChipFamily == CHIP_FAMILY_PALM) ||
141	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
142	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
143	(info->ChipFamily == CHIP_FAMILY_CAICOS))
144	sq_config = 0;
145    else
146	sq_config = VC_ENABLE_bit;
147
148    sq_config |= (EXPORT_SRC_C_bit |
149		  (sq_conf->cs_prio << CS_PRIO_shift) |
150		  (sq_conf->ls_prio << LS_PRIO_shift) |
151		  (sq_conf->hs_prio << HS_PRIO_shift) |
152		  (sq_conf->ps_prio << PS_PRIO_shift) |
153		  (sq_conf->vs_prio << VS_PRIO_shift) |
154		  (sq_conf->gs_prio << GS_PRIO_shift) |
155		  (sq_conf->es_prio << ES_PRIO_shift));
156
157    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
158			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
159			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
160    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
161			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
162    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
163			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
164
165    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
166			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
167			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
168			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
169    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
170				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
171
172    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
173				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
174
175    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
176				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
177
178    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
179				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
180
181    BEGIN_BATCH(16);
182    /* disable dyn gprs */
183    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
184    PACK0(SQ_CONFIG, 4);
185    E32(sq_config);
186    E32(sq_gpr_resource_mgmt_1);
187    E32(sq_gpr_resource_mgmt_2);
188    E32(sq_gpr_resource_mgmt_3);
189    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
190    E32(sq_thread_resource_mgmt);
191    E32(sq_thread_resource_mgmt_2);
192    E32(sq_stack_resource_mgmt_1);
193    E32(sq_stack_resource_mgmt_2);
194    E32(sq_stack_resource_mgmt_3);
195    END_BATCH();
196}
197
198/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
199 * we use here.
200 */
201void
202evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
203{
204    uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
205    unsigned pitch, slice, w, h, array_mode, nbanks;
206    uint32_t tile_split, macro_aspect, bankw, bankh;
207    RADEONInfoPtr info = RADEONPTR(pScrn);
208
209    if (cb_conf->surface) {
210	switch (cb_conf->surface->level[0].mode) {
211	case RADEON_SURF_MODE_1D:
212		array_mode = 2;
213		break;
214	case RADEON_SURF_MODE_2D:
215		array_mode = 4;
216		break;
217	default:
218		array_mode = 0;
219		break;
220	}
221	w = cb_conf->surface->level[0].npix_x;
222	h = cb_conf->surface->level[0].npix_y;
223	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
224	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
225	tile_split = cb_conf->surface->tile_split;
226	macro_aspect = cb_conf->surface->mtilea;
227	bankw = cb_conf->surface->bankw;
228	bankh = cb_conf->surface->bankh;
229	tile_split = eg_tile_split(tile_split);
230	macro_aspect = eg_macro_tile_aspect(macro_aspect);
231	bankw = eg_bank_wh(bankw);
232	bankh = eg_bank_wh(bankh);
233    } else {
234	pitch = (cb_conf->w / 8) - 1;
235	h = RADEON_ALIGN(cb_conf->h, 8);
236	slice = ((cb_conf->w * h) / 64) - 1;
237	array_mode = cb_conf->array_mode;
238	w = cb_conf->w;
239	tile_split = 4;
240	macro_aspect = 0;
241	bankw = 0;
242	bankh = 0;
243    }
244    nbanks = info->num_banks;
245    nbanks = eg_nbanks(nbanks);
246
247    cb_color_attrib |= (tile_split << CB_COLOR0_ATTRIB__TILE_SPLIT_shift)|
248		       (nbanks << CB_COLOR0_ATTRIB__NUM_BANKS_shift) |
249		       (bankw << CB_COLOR0_ATTRIB__BANK_WIDTH_shift) |
250		       (bankh << CB_COLOR0_ATTRIB__BANK_HEIGHT_shift) |
251		       (macro_aspect << CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift);
252    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
253		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
254		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
255		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
256		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
257		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
258		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
259    if (cb_conf->blend_clamp)
260	cb_color_info |= BLEND_CLAMP_bit;
261    if (cb_conf->fast_clear)
262	cb_color_info |= FAST_CLEAR_bit;
263    if (cb_conf->compression)
264	cb_color_info |= COMPRESSION_bit;
265    if (cb_conf->blend_bypass)
266	cb_color_info |= BLEND_BYPASS_bit;
267    if (cb_conf->simple_float)
268	cb_color_info |= SIMPLE_FLOAT_bit;
269    if (cb_conf->round_mode)
270	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
271    if (cb_conf->tile_compact)
272	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
273    if (cb_conf->rat)
274	cb_color_info |= RAT_bit;
275
276    /* bit 4 needs to be set for linear and depth/stencil surfaces */
277    if (cb_conf->non_disp_tiling)
278	cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
279
280    switch (cb_conf->resource_type) {
281    case BUFFER:
282	/* number of elements in the surface */
283	cb_color_dim = pitch * slice;
284	break;
285    default:
286	/* w/h of the surface */
287	cb_color_dim = (((w - 1) << WIDTH_MAX_shift) |
288			((cb_conf->h - 1) << HEIGHT_MAX_shift));
289	break;
290    }
291
292    BEGIN_BATCH(3 + 2);
293    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
294    RELOC_BATCH(cb_conf->bo, 0, domain);
295    END_BATCH();
296
297    /* Set CMASK & FMASK buffer to the offset of color buffer as
298     * we don't use those this shouldn't cause any issue and we
299     * then have a valid cmd stream
300     */
301    BEGIN_BATCH(3 + 2);
302    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
303    RELOC_BATCH(cb_conf->bo, 0, domain);
304    END_BATCH();
305    BEGIN_BATCH(3 + 2);
306    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
307    RELOC_BATCH(cb_conf->bo, 0, domain);
308    END_BATCH();
309
310    /* tiling config */
311    BEGIN_BATCH(3 + 2);
312    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
313    RELOC_BATCH(cb_conf->bo, 0, domain);
314    END_BATCH();
315    BEGIN_BATCH(3 + 2);
316    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
317    RELOC_BATCH(cb_conf->bo, 0, domain);
318    END_BATCH();
319
320    BEGIN_BATCH(33);
321    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
322    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
323    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
324    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
325    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
326    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
327    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
328    E32(0);
329    E32(0);
330    E32(0);
331    E32(0);
332    EREG(CB_TARGET_MASK,                      (cb_conf->pmask << TARGET0_ENABLE_shift));
333    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[cb_conf->rop] |
334					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
335    EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
336    END_BATCH();
337}
338
339void evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color)
340{
341    RADEONInfoPtr info = RADEONPTR(pScrn);
342
343    BEGIN_BATCH(2 + 4);
344    PACK0(CB_BLEND_RED, 4);
345    EFLOAT(color[0]); /* R */
346    EFLOAT(color[1]); /* G */
347    EFLOAT(color[2]); /* B */
348    EFLOAT(color[3]); /* A */
349    END_BATCH();
350}
351
352static void
353evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
354			      uint32_t size, uint64_t mc_addr,
355			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
356{
357    RADEONInfoPtr info = RADEONPTR(pScrn);
358    uint32_t cp_coher_size;
359    if (size == 0xffffffff)
360	cp_coher_size = 0xffffffff;
361    else
362	cp_coher_size = ((size + 255) >> 8);
363
364    BEGIN_BATCH(5 + 2);
365    PACK3(IT_SURFACE_SYNC, 4);
366    E32(sync_type);
367    E32(cp_coher_size);
368    E32((mc_addr >> 8));
369    E32(10); /* poll interval */
370    RELOC_BATCH(bo, rdomains, wdomain);
371    END_BATCH();
372}
373
374/* inserts a wait for vline in the command stream */
375void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
376				  xf86CrtcPtr crtc, int start, int stop)
377{
378    RADEONInfoPtr  info = RADEONPTR(pScrn);
379    drmmode_crtc_private_ptr drmmode_crtc;
380
381    if (!crtc)
382        return;
383
384    drmmode_crtc = crtc->driver_private;
385
386    if (!crtc->enabled)
387        return;
388
389    if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
390        return;
391
392    start = max(start, crtc->y);
393    stop = min(stop, crtc->y + crtc->mode.VDisplay);
394
395    if (start >= stop)
396        return;
397
398    BEGIN_BATCH(11);
399    /* set the VLINE range */
400    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
401	 (start << EVERGREEN_VLINE_START_SHIFT) |
402	 (stop << EVERGREEN_VLINE_END_SHIFT));
403
404    /* tell the CP to poll the VLINE state register */
405    PACK3(IT_WAIT_REG_MEM, 6);
406    E32(IT_WAIT_REG | IT_WAIT_EQ);
407    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
408    E32(0);
409    E32(0);                          // Ref value
410    E32(EVERGREEN_VLINE_STAT);    // Mask
411    E32(10);                         // Wait interval
412    /* add crtc reloc */
413    PACK3(IT_NOP, 1);
414    E32(drmmode_crtc->mode_crtc->crtc_id);
415    END_BATCH();
416}
417
418void
419evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
420{
421    RADEONInfoPtr info = RADEONPTR(pScrn);
422
423    BEGIN_BATCH(8);
424    /* Interpolator setup */
425    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
426    PACK0(SPI_PS_IN_CONTROL_0, 3);
427    E32(((num_interp << NUM_INTERP_shift) |
428	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
429    E32(0); // SPI_PS_IN_CONTROL_1
430    E32(0); // SPI_INTERP_CONTROL_0
431    END_BATCH();
432}
433
434void
435evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
436{
437    RADEONInfoPtr info = RADEONPTR(pScrn);
438    uint32_t sq_pgm_resources;
439
440    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
441			(fs_conf->stack_size << STACK_SIZE_shift));
442
443    if (fs_conf->dx10_clamp)
444	sq_pgm_resources |= DX10_CLAMP_bit;
445
446    BEGIN_BATCH(3 + 2);
447    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
448    RELOC_BATCH(fs_conf->bo, domain, 0);
449    END_BATCH();
450
451    BEGIN_BATCH(3);
452    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
453    END_BATCH();
454}
455
456/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
457 * but none that we use here.
458 */
459void
460evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
461{
462    RADEONInfoPtr info = RADEONPTR(pScrn);
463    uint32_t sq_pgm_resources, sq_pgm_resources_2;
464
465    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
466			(vs_conf->stack_size << STACK_SIZE_shift));
467
468    if (vs_conf->dx10_clamp)
469	sq_pgm_resources |= DX10_CLAMP_bit;
470    if (vs_conf->uncached_first_inst)
471	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
472
473    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
474			  (vs_conf->double_round << DOUBLE_ROUND_shift));
475
476    if (vs_conf->allow_sdi)
477	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
478    if (vs_conf->allow_sd0)
479	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
480    if (vs_conf->allow_ddi)
481	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
482    if (vs_conf->allow_ddo)
483	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
484
485    /* flush SQ cache */
486    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
487				  vs_conf->shader_size, vs_conf->shader_addr,
488				  vs_conf->bo, domain, 0);
489
490    BEGIN_BATCH(3 + 2);
491    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
492    RELOC_BATCH(vs_conf->bo, domain, 0);
493    END_BATCH();
494
495    BEGIN_BATCH(4);
496    PACK0(SQ_PGM_RESOURCES_VS, 2);
497    E32(sq_pgm_resources);
498    E32(sq_pgm_resources_2);
499    END_BATCH();
500}
501
502/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
503 * but none that we use here.
504 */
505void
506evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
507{
508    RADEONInfoPtr info = RADEONPTR(pScrn);
509    uint32_t sq_pgm_resources, sq_pgm_resources_2;
510
511    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
512			(ps_conf->stack_size << STACK_SIZE_shift));
513
514    if (ps_conf->dx10_clamp)
515	sq_pgm_resources |= DX10_CLAMP_bit;
516    if (ps_conf->uncached_first_inst)
517	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
518    if (ps_conf->clamp_consts)
519	sq_pgm_resources |= CLAMP_CONSTS_bit;
520
521    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
522			  (ps_conf->double_round << DOUBLE_ROUND_shift));
523
524    if (ps_conf->allow_sdi)
525	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
526    if (ps_conf->allow_sd0)
527	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
528    if (ps_conf->allow_ddi)
529	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
530    if (ps_conf->allow_ddo)
531	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
532
533    /* flush SQ cache */
534    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
535				  ps_conf->shader_size, ps_conf->shader_addr,
536				  ps_conf->bo, domain, 0);
537
538    BEGIN_BATCH(3 + 2);
539    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
540    RELOC_BATCH(ps_conf->bo, domain, 0);
541    END_BATCH();
542
543    BEGIN_BATCH(5);
544    PACK0(SQ_PGM_RESOURCES_PS, 3);
545    E32(sq_pgm_resources);
546    E32(sq_pgm_resources_2);
547    E32(ps_conf->export_mode);
548    END_BATCH();
549}
550
551void
552evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
553{
554    RADEONInfoPtr info = RADEONPTR(pScrn);
555    /* size reg is units of 16 consts (4 dwords each) */
556    uint32_t size = const_conf->size_bytes >> 8;
557
558    if (size == 0)
559	size = 1;
560
561#if X_BYTE_ORDER == X_BIG_ENDIAN
562    {
563	    uint32_t count = size << 6, *p = const_conf->cpu_ptr;
564
565	    while(count--) {
566		    *p = cpu_to_le32(*p);
567		    p++;
568	    }
569    }
570#endif
571
572    /* flush SQ cache */
573    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
574				  const_conf->size_bytes, const_conf->const_addr,
575				  const_conf->bo, domain, 0);
576
577    switch (const_conf->type) {
578    case SHADER_TYPE_VS:
579	BEGIN_BATCH(3);
580	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
581	END_BATCH();
582	BEGIN_BATCH(3 + 2);
583	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
584	RELOC_BATCH(const_conf->bo, domain, 0);
585	END_BATCH();
586	break;
587    case SHADER_TYPE_PS:
588	BEGIN_BATCH(3);
589	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
590	END_BATCH();
591	BEGIN_BATCH(3 + 2);
592	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
593	RELOC_BATCH(const_conf->bo, domain, 0);
594	END_BATCH();
595	break;
596    default:
597	ErrorF("Unsupported const type %d\n", const_conf->type);
598	break;
599    }
600
601}
602
603void
604evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
605{
606    RADEONInfoPtr info = RADEONPTR(pScrn);
607    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
608     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
609     */
610    BEGIN_BATCH(3);
611    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
612    END_BATCH();
613}
614
615/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
616 * but none that we use here.
617 */
618static void
619evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
620{
621    RADEONInfoPtr info = RADEONPTR(pScrn);
622    struct radeon_accel_state *accel_state = info->accel_state;
623    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
624
625    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
626			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
627			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
628			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
629			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
630    if (res->clamp_x)
631	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
632
633    if (res->format_comp_all)
634	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
635
636    if (res->srf_mode_all)
637	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
638
639    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
640			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
641			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
642			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
643
644    if (res->uncached)
645	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
646
647    /* XXX ??? */
648    sq_vtx_constant_word4 = 0;
649
650    /* flush vertex cache */
651    if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
652	(info->ChipFamily == CHIP_FAMILY_PALM) ||
653	(info->ChipFamily == CHIP_FAMILY_SUMO) ||
654	(info->ChipFamily == CHIP_FAMILY_SUMO2) ||
655	(info->ChipFamily == CHIP_FAMILY_CAICOS) ||
656	(info->ChipFamily == CHIP_FAMILY_CAYMAN) ||
657	(info->ChipFamily == CHIP_FAMILY_ARUBA))
658	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
659				      accel_state->vbo.vb_offset, 0,
660				      res->bo,
661				      domain, 0);
662    else
663	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
664				      accel_state->vbo.vb_offset, 0,
665				      res->bo,
666				      domain, 0);
667
668    BEGIN_BATCH(10 + 2);
669    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
670    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
671    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
672    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
673    E32(sq_vtx_constant_word3);		// 3: swizzles
674    E32(sq_vtx_constant_word4);		// 4: num elements
675    E32(0);							// 5: n/a
676    E32(0);							// 6: n/a
677    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
678    RELOC_BATCH(res->bo, domain, 0);
679    END_BATCH();
680}
681
682/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
683 * but none that we use here.
684 */
685void
686evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
687{
688    RADEONInfoPtr info = RADEONPTR(pScrn);
689    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
690    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
691    uint32_t array_mode, pitch, tile_split, macro_aspect, bankw, bankh, nbanks;
692
693    if (tex_res->surface) {
694	switch (tex_res->surface->level[0].mode) {
695	case RADEON_SURF_MODE_1D:
696		array_mode = 2;
697		break;
698	case RADEON_SURF_MODE_2D:
699		array_mode = 4;
700		break;
701	default:
702		array_mode = 0;
703		break;
704	}
705	pitch = tex_res->surface->level[0].nblk_x >> 3;
706	tile_split = tex_res->surface->tile_split;
707	macro_aspect = tex_res->surface->mtilea;
708	bankw = tex_res->surface->bankw;
709	bankh = tex_res->surface->bankh;
710	tile_split = eg_tile_split(tile_split);
711	macro_aspect = eg_macro_tile_aspect(macro_aspect);
712	bankw = eg_bank_wh(bankw);
713	bankh = eg_bank_wh(bankh);
714    } else {
715	array_mode = tex_res->array_mode;
716	pitch = (tex_res->pitch + 7) >> 3;
717	tile_split = 4;
718	macro_aspect = 0;
719	bankw = 0;
720	bankh = 0;
721    }
722    nbanks = info->num_banks;
723    nbanks = eg_nbanks(nbanks);
724
725    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
726
727    if (tex_res->w)
728	sq_tex_resource_word0 |= ( ((pitch - 1) << PITCH_shift) |
729				   ((tex_res->w - 1) << TEX_WIDTH_shift) );
730
731    if (tex_res->tile_type)
732	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
733
734    sq_tex_resource_word1 = (array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
735
736    if (tex_res->h)
737	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
738    if (tex_res->depth)
739	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
740
741    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
742			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
743			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
744			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
745			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
746			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
747			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
748			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
749			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
750			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
751			     (tex_res->base_level << BASE_LEVEL_shift));
752
753    if (tex_res->srf_mode_all)
754	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
755    if (tex_res->force_degamma)
756	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
757
758    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
759			     (tex_res->base_array << BASE_ARRAY_shift) |
760			     (tex_res->last_array << LAST_ARRAY_shift));
761
762    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
763			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
764			     (tile_split << SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift));
765
766    if (tex_res->interlaced)
767	sq_tex_resource_word6 |= INTERLACED_bit;
768
769    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
770			     (macro_aspect << SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift) |
771			     (nbanks << SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift) |
772			     (bankw << SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift) |
773			     (bankh << SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift) |
774			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
775
776    /* flush texture cache */
777    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
778				  tex_res->size, tex_res->base,
779				  tex_res->bo, domain, 0);
780
781    BEGIN_BATCH(10 + 4);
782    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
783    E32(sq_tex_resource_word0);
784    E32(sq_tex_resource_word1);
785    E32(((tex_res->base) >> 8));
786    E32(((tex_res->mip_base) >> 8));
787    E32(sq_tex_resource_word4);
788    E32(sq_tex_resource_word5);
789    E32(sq_tex_resource_word6);
790    E32(sq_tex_resource_word7);
791    RELOC_BATCH(tex_res->bo, domain, 0);
792    RELOC_BATCH(tex_res->mip_bo, domain, 0);
793    END_BATCH();
794}
795
796/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
797 * but none that we use here.
798 */
799void
800evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
801{
802    RADEONInfoPtr info = RADEONPTR(pScrn);
803    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
804
805    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
806			    (s->clamp_y       << CLAMP_Y_shift)					|
807			    (s->clamp_z       << CLAMP_Z_shift)					|
808			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
809			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
810			    (s->z_filter      << Z_FILTER_shift)	|
811			    (s->mip_filter    << MIP_FILTER_shift)				|
812			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
813			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
814			    (s->chroma_key    << CHROMA_KEY_shift));
815
816    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
817			    (s->max_lod       << MAX_LOD_shift)					|
818			    (s->perf_mip      << PERF_MIP_shift)	|
819			    (s->perf_z        << PERF_Z_shift));
820
821
822    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
823			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
824
825    if (s->mc_coord_truncate)
826	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
827    if (s->force_degamma)
828	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
829    if (s->truncate_coord)
830	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
831    if (s->disable_cube_wrap)
832	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
833    if (s->type)
834	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
835
836    BEGIN_BATCH(5);
837    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
838    E32(sq_tex_sampler_word0);
839    E32(sq_tex_sampler_word1);
840    E32(sq_tex_sampler_word2);
841    END_BATCH();
842}
843
844/* workarounds for hw bugs in eg+ */
845/* only affects screen/window/generic/vport.  cliprects are not affected */
846static void
847evergreen_fix_scissor_coordinates(ScrnInfoPtr pScrn, int *x1, int *y1, int *x2, int *y2)
848{
849    RADEONInfoPtr info = RADEONPTR(pScrn);
850
851    /* all eg+ asics */
852    if (*x2 == 0)
853	*x1 = 1;
854    if (*y2 == 0)
855	*y1 = 1;
856
857    /* cayman/tn only */
858    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
859	/* cliprects aren't affected so we can use them to clip if we need
860	 * a true 1x1 clip region
861	 */
862	if ((*x2 == 1) && (*y2 == 1))
863	    *x2 = 2;
864    }
865}
866
867//XXX deal with clip offsets in clip setup
868void
869evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
870{
871    RADEONInfoPtr info = RADEONPTR(pScrn);
872
873    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
874
875    BEGIN_BATCH(4);
876    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
877    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
878	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
879    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
880	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
881    END_BATCH();
882}
883
884void
885evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
886{
887    RADEONInfoPtr info = RADEONPTR(pScrn);
888
889    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
890
891    BEGIN_BATCH(4);
892    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
893    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
894	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
895	 WINDOW_OFFSET_DISABLE_bit));
896    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
897	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
898    END_BATCH();
899}
900
901void
902evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
903{
904    RADEONInfoPtr info = RADEONPTR(pScrn);
905
906    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
907
908    BEGIN_BATCH(4);
909    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
910    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
911	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
912	 WINDOW_OFFSET_DISABLE_bit));
913    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
914	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
915    END_BATCH();
916}
917
918void
919evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
920{
921    RADEONInfoPtr info = RADEONPTR(pScrn);
922
923    evergreen_fix_scissor_coordinates(pScrn, &x1, &y1, &x2, &y2);
924
925    BEGIN_BATCH(4);
926    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
927    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
928	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
929	 WINDOW_OFFSET_DISABLE_bit));
930    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
931	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
932    END_BATCH();
933}
934
935void
936evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
937{
938    RADEONInfoPtr info = RADEONPTR(pScrn);
939
940    BEGIN_BATCH(4);
941    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
942    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
943	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
944    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
945	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
946    END_BATCH();
947}
948
949/*
950 * Setup of default state
951 */
952
953void
954evergreen_set_default_state(ScrnInfoPtr pScrn)
955{
956    tex_resource_t tex_res;
957    shader_config_t fs_conf;
958    sq_config_t sq_conf;
959    int i;
960    RADEONInfoPtr info = RADEONPTR(pScrn);
961    struct radeon_accel_state *accel_state = info->accel_state;
962
963    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
964	cayman_set_default_state(pScrn);
965	return;
966    }
967
968    if (accel_state->XInited3D)
969	return;
970
971    memset(&tex_res, 0, sizeof(tex_resource_t));
972    memset(&fs_conf, 0, sizeof(shader_config_t));
973
974    accel_state->XInited3D = TRUE;
975
976    evergreen_start_3d(pScrn);
977
978    /* SQ */
979    sq_conf.ps_prio = 0;
980    sq_conf.vs_prio = 1;
981    sq_conf.gs_prio = 2;
982    sq_conf.es_prio = 3;
983    sq_conf.hs_prio = 0;
984    sq_conf.ls_prio = 0;
985    sq_conf.cs_prio = 0;
986
987    switch (info->ChipFamily) {
988    case CHIP_FAMILY_CEDAR:
989    default:
990	sq_conf.num_ps_gprs = 93;
991	sq_conf.num_vs_gprs = 46;
992	sq_conf.num_temp_gprs = 4;
993	sq_conf.num_gs_gprs = 31;
994	sq_conf.num_es_gprs = 31;
995	sq_conf.num_hs_gprs = 23;
996	sq_conf.num_ls_gprs = 23;
997	sq_conf.num_ps_threads = 96;
998	sq_conf.num_vs_threads = 16;
999	sq_conf.num_gs_threads = 16;
1000	sq_conf.num_es_threads = 16;
1001	sq_conf.num_hs_threads = 16;
1002	sq_conf.num_ls_threads = 16;
1003	sq_conf.num_ps_stack_entries = 42;
1004	sq_conf.num_vs_stack_entries = 42;
1005	sq_conf.num_gs_stack_entries = 42;
1006	sq_conf.num_es_stack_entries = 42;
1007	sq_conf.num_hs_stack_entries = 42;
1008	sq_conf.num_ls_stack_entries = 42;
1009	break;
1010    case CHIP_FAMILY_REDWOOD:
1011	sq_conf.num_ps_gprs = 93;
1012	sq_conf.num_vs_gprs = 46;
1013	sq_conf.num_temp_gprs = 4;
1014	sq_conf.num_gs_gprs = 31;
1015	sq_conf.num_es_gprs = 31;
1016	sq_conf.num_hs_gprs = 23;
1017	sq_conf.num_ls_gprs = 23;
1018	sq_conf.num_ps_threads = 128;
1019	sq_conf.num_vs_threads = 20;
1020	sq_conf.num_gs_threads = 20;
1021	sq_conf.num_es_threads = 20;
1022	sq_conf.num_hs_threads = 20;
1023	sq_conf.num_ls_threads = 20;
1024	sq_conf.num_ps_stack_entries = 42;
1025	sq_conf.num_vs_stack_entries = 42;
1026	sq_conf.num_gs_stack_entries = 42;
1027	sq_conf.num_es_stack_entries = 42;
1028	sq_conf.num_hs_stack_entries = 42;
1029	sq_conf.num_ls_stack_entries = 42;
1030	break;
1031    case CHIP_FAMILY_JUNIPER:
1032	sq_conf.num_ps_gprs = 93;
1033	sq_conf.num_vs_gprs = 46;
1034	sq_conf.num_temp_gprs = 4;
1035	sq_conf.num_gs_gprs = 31;
1036	sq_conf.num_es_gprs = 31;
1037	sq_conf.num_hs_gprs = 23;
1038	sq_conf.num_ls_gprs = 23;
1039	sq_conf.num_ps_threads = 128;
1040	sq_conf.num_vs_threads = 20;
1041	sq_conf.num_gs_threads = 20;
1042	sq_conf.num_es_threads = 20;
1043	sq_conf.num_hs_threads = 20;
1044	sq_conf.num_ls_threads = 20;
1045	sq_conf.num_ps_stack_entries = 85;
1046	sq_conf.num_vs_stack_entries = 85;
1047	sq_conf.num_gs_stack_entries = 85;
1048	sq_conf.num_es_stack_entries = 85;
1049	sq_conf.num_hs_stack_entries = 85;
1050	sq_conf.num_ls_stack_entries = 85;
1051	break;
1052    case CHIP_FAMILY_CYPRESS:
1053    case CHIP_FAMILY_HEMLOCK:
1054	sq_conf.num_ps_gprs = 93;
1055	sq_conf.num_vs_gprs = 46;
1056	sq_conf.num_temp_gprs = 4;
1057	sq_conf.num_gs_gprs = 31;
1058	sq_conf.num_es_gprs = 31;
1059	sq_conf.num_hs_gprs = 23;
1060	sq_conf.num_ls_gprs = 23;
1061	sq_conf.num_ps_threads = 128;
1062	sq_conf.num_vs_threads = 20;
1063	sq_conf.num_gs_threads = 20;
1064	sq_conf.num_es_threads = 20;
1065	sq_conf.num_hs_threads = 20;
1066	sq_conf.num_ls_threads = 20;
1067	sq_conf.num_ps_stack_entries = 85;
1068	sq_conf.num_vs_stack_entries = 85;
1069	sq_conf.num_gs_stack_entries = 85;
1070	sq_conf.num_es_stack_entries = 85;
1071	sq_conf.num_hs_stack_entries = 85;
1072	sq_conf.num_ls_stack_entries = 85;
1073	break;
1074    case CHIP_FAMILY_PALM:
1075	sq_conf.num_ps_gprs = 93;
1076	sq_conf.num_vs_gprs = 46;
1077	sq_conf.num_temp_gprs = 4;
1078	sq_conf.num_gs_gprs = 31;
1079	sq_conf.num_es_gprs = 31;
1080	sq_conf.num_hs_gprs = 23;
1081	sq_conf.num_ls_gprs = 23;
1082	sq_conf.num_ps_threads = 96;
1083	sq_conf.num_vs_threads = 16;
1084	sq_conf.num_gs_threads = 16;
1085	sq_conf.num_es_threads = 16;
1086	sq_conf.num_hs_threads = 16;
1087	sq_conf.num_ls_threads = 16;
1088	sq_conf.num_ps_stack_entries = 42;
1089	sq_conf.num_vs_stack_entries = 42;
1090	sq_conf.num_gs_stack_entries = 42;
1091	sq_conf.num_es_stack_entries = 42;
1092	sq_conf.num_hs_stack_entries = 42;
1093	sq_conf.num_ls_stack_entries = 42;
1094	break;
1095    case CHIP_FAMILY_SUMO:
1096	sq_conf.num_ps_gprs = 93;
1097	sq_conf.num_vs_gprs = 46;
1098	sq_conf.num_temp_gprs = 4;
1099	sq_conf.num_gs_gprs = 31;
1100	sq_conf.num_es_gprs = 31;
1101	sq_conf.num_hs_gprs = 23;
1102	sq_conf.num_ls_gprs = 23;
1103	sq_conf.num_ps_threads = 96;
1104	sq_conf.num_vs_threads = 25;
1105	sq_conf.num_gs_threads = 25;
1106	sq_conf.num_es_threads = 25;
1107	sq_conf.num_hs_threads = 25;
1108	sq_conf.num_ls_threads = 25;
1109	sq_conf.num_ps_stack_entries = 42;
1110	sq_conf.num_vs_stack_entries = 42;
1111	sq_conf.num_gs_stack_entries = 42;
1112	sq_conf.num_es_stack_entries = 42;
1113	sq_conf.num_hs_stack_entries = 42;
1114	sq_conf.num_ls_stack_entries = 42;
1115	break;
1116    case CHIP_FAMILY_SUMO2:
1117	sq_conf.num_ps_gprs = 93;
1118	sq_conf.num_vs_gprs = 46;
1119	sq_conf.num_temp_gprs = 4;
1120	sq_conf.num_gs_gprs = 31;
1121	sq_conf.num_es_gprs = 31;
1122	sq_conf.num_hs_gprs = 23;
1123	sq_conf.num_ls_gprs = 23;
1124	sq_conf.num_ps_threads = 96;
1125	sq_conf.num_vs_threads = 25;
1126	sq_conf.num_gs_threads = 25;
1127	sq_conf.num_es_threads = 25;
1128	sq_conf.num_hs_threads = 25;
1129	sq_conf.num_ls_threads = 25;
1130	sq_conf.num_ps_stack_entries = 85;
1131	sq_conf.num_vs_stack_entries = 85;
1132	sq_conf.num_gs_stack_entries = 85;
1133	sq_conf.num_es_stack_entries = 85;
1134	sq_conf.num_hs_stack_entries = 85;
1135	sq_conf.num_ls_stack_entries = 85;
1136	break;
1137    case CHIP_FAMILY_BARTS:
1138	sq_conf.num_ps_gprs = 93;
1139	sq_conf.num_vs_gprs = 46;
1140	sq_conf.num_temp_gprs = 4;
1141	sq_conf.num_gs_gprs = 31;
1142	sq_conf.num_es_gprs = 31;
1143	sq_conf.num_hs_gprs = 23;
1144	sq_conf.num_ls_gprs = 23;
1145	sq_conf.num_ps_threads = 128;
1146	sq_conf.num_vs_threads = 20;
1147	sq_conf.num_gs_threads = 20;
1148	sq_conf.num_es_threads = 20;
1149	sq_conf.num_hs_threads = 20;
1150	sq_conf.num_ls_threads = 20;
1151	sq_conf.num_ps_stack_entries = 85;
1152	sq_conf.num_vs_stack_entries = 85;
1153	sq_conf.num_gs_stack_entries = 85;
1154	sq_conf.num_es_stack_entries = 85;
1155	sq_conf.num_hs_stack_entries = 85;
1156	sq_conf.num_ls_stack_entries = 85;
1157	break;
1158    case CHIP_FAMILY_TURKS:
1159	sq_conf.num_ps_gprs = 93;
1160	sq_conf.num_vs_gprs = 46;
1161	sq_conf.num_temp_gprs = 4;
1162	sq_conf.num_gs_gprs = 31;
1163	sq_conf.num_es_gprs = 31;
1164	sq_conf.num_hs_gprs = 23;
1165	sq_conf.num_ls_gprs = 23;
1166	sq_conf.num_ps_threads = 128;
1167	sq_conf.num_vs_threads = 20;
1168	sq_conf.num_gs_threads = 20;
1169	sq_conf.num_es_threads = 20;
1170	sq_conf.num_hs_threads = 20;
1171	sq_conf.num_ls_threads = 20;
1172	sq_conf.num_ps_stack_entries = 42;
1173	sq_conf.num_vs_stack_entries = 42;
1174	sq_conf.num_gs_stack_entries = 42;
1175	sq_conf.num_es_stack_entries = 42;
1176	sq_conf.num_hs_stack_entries = 42;
1177	sq_conf.num_ls_stack_entries = 42;
1178	break;
1179    case CHIP_FAMILY_CAICOS:
1180	sq_conf.num_ps_gprs = 93;
1181	sq_conf.num_vs_gprs = 46;
1182	sq_conf.num_temp_gprs = 4;
1183	sq_conf.num_gs_gprs = 31;
1184	sq_conf.num_es_gprs = 31;
1185	sq_conf.num_hs_gprs = 23;
1186	sq_conf.num_ls_gprs = 23;
1187	sq_conf.num_ps_threads = 128;
1188	sq_conf.num_vs_threads = 10;
1189	sq_conf.num_gs_threads = 10;
1190	sq_conf.num_es_threads = 10;
1191	sq_conf.num_hs_threads = 10;
1192	sq_conf.num_ls_threads = 10;
1193	sq_conf.num_ps_stack_entries = 42;
1194	sq_conf.num_vs_stack_entries = 42;
1195	sq_conf.num_gs_stack_entries = 42;
1196	sq_conf.num_es_stack_entries = 42;
1197	sq_conf.num_hs_stack_entries = 42;
1198	sq_conf.num_ls_stack_entries = 42;
1199	break;
1200    }
1201
1202    evergreen_sq_setup(pScrn, &sq_conf);
1203
1204    BEGIN_BATCH(27);
1205    EREG(SQ_LDS_ALLOC_PS, 0);
1206    EREG(SQ_LDS_RESOURCE_MGMT, 0x10001000);
1207    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
1208
1209    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
1210    E32(0);
1211    E32(0);
1212    E32(0);
1213    E32(0);
1214    E32(0);
1215    E32(0);
1216
1217    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
1218    E32(0);
1219    E32(0);
1220    E32(0);
1221    E32(0);
1222
1223    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
1224    E32(0);
1225    E32(0);
1226    END_BATCH();
1227
1228    /* DB */
1229    BEGIN_BATCH(3 + 2);
1230    EREG(DB_Z_INFO,                           0);
1231    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1232    END_BATCH();
1233
1234    BEGIN_BATCH(3 + 2);
1235    EREG(DB_STENCIL_INFO,                     0);
1236    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1237    END_BATCH();
1238
1239    BEGIN_BATCH(3 + 2);
1240    EREG(DB_HTILE_DATA_BASE,                    0);
1241    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1242    END_BATCH();
1243
1244    BEGIN_BATCH(49);
1245    EREG(DB_DEPTH_CONTROL,                    0);
1246
1247    PACK0(PA_SC_VPORT_ZMIN_0, 2);
1248    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
1249    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
1250
1251    PACK0(DB_RENDER_CONTROL, 5);
1252    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
1253    E32(0); // DB_COUNT_CONTROL
1254    E32(0); // DB_DEPTH_VIEW
1255    E32(0x2a); // DB_RENDER_OVERRIDE
1256    E32(0); // DB_RENDER_OVERRIDE2
1257
1258    PACK0(DB_STENCIL_CLEAR, 2);
1259    E32(0); // DB_STENCIL_CLEAR
1260    E32(0); // DB_DEPTH_CLEAR
1261
1262    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
1263					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
1264					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
1265					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
1266
1267    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
1268			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1269
1270    // SX
1271    EREG(SX_MISC,               0);
1272
1273    // CB
1274    PACK0(SX_ALPHA_TEST_CONTROL, 5);
1275    E32(0); // SX_ALPHA_TEST_CONTROL
1276    E32(0x00000000); //CB_BLEND_RED
1277    E32(0x00000000); //CB_BLEND_GREEN
1278    E32(0x00000000); //CB_BLEND_BLUE
1279    E32(0x00000000); //CB_BLEND_ALPHA
1280
1281    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
1282
1283    // SC
1284    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1285					       (0 << WINDOW_Y_OFFSET_shift)));
1286    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1287    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
1288    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
1289    END_BATCH();
1290
1291    /* clip boolean is set to always visible -> doesn't matter */
1292    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1293	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
1294
1295    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1296	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
1297
1298    BEGIN_BATCH(57);
1299    PACK0(PA_SC_MODE_CNTL_0, 2);
1300    E32(0); // PA_SC_MODE_CNTL_0
1301    E32(0); // PA_SC_MODE_CNTL_1
1302
1303    PACK0(PA_SC_LINE_CNTL, 16);
1304    E32(0); // PA_SC_LINE_CNTL
1305    E32(0); // PA_SC_AA_CONFIG
1306    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
1307	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
1308    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1309    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1310    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1311    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1312    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
1313    E32(0);
1314    E32(0);
1315    E32(0);
1316    E32(0);
1317    E32(0);
1318    E32(0);
1319    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
1320    E32(0xFFFFFFFF); // PA_SC_AA_MASK
1321
1322    // CL
1323    PACK0(PA_CL_CLIP_CNTL, 8);
1324    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1325    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
1326    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
1327    E32(0); // PA_CL_VS_OUT_CNTL
1328    E32(0); // PA_CL_NANINF_CNTL
1329    E32(0); // PA_SU_LINE_STIPPLE_CNTL
1330    E32(0); // PA_SU_LINE_STIPPLE_SCALE
1331    E32(0); // PA_SU_PRIM_FILTER_CNTL
1332
1333    // SU
1334    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1335    E32(0);
1336    E32(0);
1337    E32(0);
1338    E32(0);
1339    E32(0);
1340    E32(0);
1341
1342    /* src = semantic id 0; mask = semantic id 1 */
1343    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1344			   (1 << SEMANTIC_1_shift)));
1345    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1346    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1347    E32(((0    << SEMANTIC_shift)	|
1348	 (0x01 << DEFAULT_VAL_shift)));
1349    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1350    E32(((1    << SEMANTIC_shift)	|
1351	 (0x01 << DEFAULT_VAL_shift)));
1352
1353    PACK0(SPI_INPUT_Z, 8);
1354    E32(0); // SPI_INPUT_Z
1355    E32(0); // SPI_FOG_CNTL
1356    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
1357    E32(0); // SPI_PS_IN_CONTROL_2
1358    E32(0);
1359    E32(0);
1360    E32(0);
1361    E32(0);
1362    END_BATCH();
1363
1364    // clear FS
1365    fs_conf.bo = accel_state->shaders_bo;
1366    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1367
1368    // VGT
1369    BEGIN_BATCH(46);
1370
1371    PACK0(VGT_MAX_VTX_INDX, 4);
1372    E32(0xffffff);
1373    E32(0);
1374    E32(0);
1375    E32(0);
1376
1377    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1378    E32(0);
1379    E32(0);
1380
1381    PACK0(VGT_REUSE_OFF, 2);
1382    E32(0);
1383    E32(0);
1384
1385    PACK0(PA_SU_POINT_SIZE, 17);
1386    E32(0); // PA_SU_POINT_SIZE
1387    E32(0); // PA_SU_POINT_MINMAX
1388    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1389    E32(0); // PA_SC_LINE_STIPPLE
1390    E32(0); // VGT_OUTPUT_PATH_CNTL
1391    E32(0); // VGT_HOS_CNTL
1392    E32(0);
1393    E32(0);
1394    E32(0);
1395    E32(0);
1396    E32(0);
1397    E32(0);
1398    E32(0);
1399    E32(0);
1400    E32(0);
1401    E32(0);
1402    E32(0); // VGT_GS_MODE
1403
1404    EREG(VGT_PRIMITIVEID_EN,                  0);
1405    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1406    EREG(VGT_SHADER_STAGES_EN,          0);
1407
1408    PACK0(VGT_STRMOUT_CONFIG, 2);
1409    E32(0);
1410    E32(0);
1411    END_BATCH();
1412}
1413
1414
1415/*
1416 * Commands
1417 */
1418
1419void
1420evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1421{
1422    RADEONInfoPtr info = RADEONPTR(pScrn);
1423
1424    BEGIN_BATCH(10);
1425    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1426    PACK3(IT_INDEX_TYPE, 1);
1427#if X_BYTE_ORDER == X_BIG_ENDIAN
1428    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1429#else
1430    E32(draw_conf->index_type);
1431#endif
1432    PACK3(IT_NUM_INSTANCES, 1);
1433    E32(draw_conf->num_instances);
1434    PACK3(IT_DRAW_INDEX_AUTO, 2);
1435    E32(draw_conf->num_indices);
1436    E32(draw_conf->vgt_draw_initiator);
1437    END_BATCH();
1438}
1439
1440void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1441{
1442    RADEONInfoPtr info = RADEONPTR(pScrn);
1443    struct radeon_accel_state *accel_state = info->accel_state;
1444    draw_config_t   draw_conf;
1445    vtx_resource_t  vtx_res;
1446
1447    if (accel_state->vbo.vb_start_op == -1)
1448      return;
1449
1450    CLEAR (draw_conf);
1451    CLEAR (vtx_res);
1452
1453    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1454	radeon_ib_discard(pScrn);
1455	radeon_cs_flush_indirect(pScrn);
1456	return;
1457    }
1458
1459    /* Vertex buffer setup */
1460    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1461    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
1462    vtx_res.vtx_size_dw     = vtx_size / 4;
1463    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1464    vtx_res.vb_addr         = accel_state->vbo.vb_start_op;
1465    vtx_res.bo              = accel_state->vbo.vb_bo;
1466    vtx_res.dst_sel_x       = SQ_SEL_X;
1467    vtx_res.dst_sel_y       = SQ_SEL_Y;
1468    vtx_res.dst_sel_z       = SQ_SEL_Z;
1469    vtx_res.dst_sel_w       = SQ_SEL_W;
1470#if X_BYTE_ORDER == X_BIG_ENDIAN
1471    vtx_res.endian          = SQ_ENDIAN_8IN32;
1472#endif
1473    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1474
1475    /* Draw */
1476    draw_conf.prim_type          = DI_PT_RECTLIST;
1477    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1478    draw_conf.num_instances      = 1;
1479    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1480    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1481
1482    evergreen_draw_auto(pScrn, &draw_conf);
1483
1484    /* sync dst surface */
1485    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1486				  accel_state->dst_size, 0,
1487				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1488
1489    accel_state->vbo.vb_start_op = -1;
1490    accel_state->cbuf.vb_start_op = -1;
1491    accel_state->ib_reset_op = 0;
1492
1493}
1494
1495