r6xx_accel.c revision 69d0ef43
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *          Matthias Hopf <mhopf@suse.de>
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#include "xf86.h"
31
32#include <errno.h>
33
34#include "radeon.h"
35#include "r600_shader.h"
36#include "radeon_reg.h"
37#include "r600_reg.h"
38#include "r600_state.h"
39
40#include "radeon_drm.h"
41#include "radeon_vbo.h"
42#include "radeon_exa_shared.h"
43
44/* we try and batch operations together under KMS -
45   but it doesn't work yet without misrendering */
46#define KMS_MULTI_OP 1
47
48/* Flush the indirect buffer to the kernel for submission to the card */
49void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib)
50{
51    RADEONInfoPtr  info = RADEONPTR(pScrn);
52    drmBufPtr          buffer = ib;
53    int                start  = 0;
54    drm_radeon_indirect_t  indirect;
55
56#if defined(XF86DRM_MODE)
57    if (info->cs) {
58	radeon_cs_flush_indirect(pScrn);
59	return;
60    }
61#endif
62
63    if (!buffer) return;
64
65    //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
66    //       buffer->idx);
67
68    while (buffer->used & 0x3c){
69	BEGIN_BATCH(1);
70        E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */
71	END_BATCH();
72    }
73
74    //ErrorF("buffer bytes: %d\n", buffer->used);
75
76    indirect.idx     = buffer->idx;
77    indirect.start   = start;
78    indirect.end     = buffer->used;
79    indirect.discard = 1;
80
81    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
82			&indirect, sizeof(drm_radeon_indirect_t));
83
84}
85
86void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
87{
88#if defined(XF86DRM_MODE)
89    RADEONInfoPtr info = RADEONPTR(pScrn);
90    if (info->cs) {
91        radeon_ib_discard(pScrn);
92    }
93#endif
94    if (!ib) return;
95
96    ib->used = 0;
97    R600CPFlushIndirect(pScrn, ib);
98}
99
100void
101wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
102{
103    RADEONInfoPtr info = RADEONPTR(pScrn);
104
105    //flush caches, don't generate timestamp
106    BEGIN_BATCH(5);
107    PACK3(ib, IT_EVENT_WRITE, 1);
108    E32(ib, CACHE_FLUSH_AND_INV_EVENT);
109    // wait for 3D idle clean
110    EREG(ib, WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
111						   WAIT_3D_IDLECLEAN_bit));
112    END_BATCH();
113}
114
115void
116wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
117{
118    RADEONInfoPtr info = RADEONPTR(pScrn);
119
120    BEGIN_BATCH(3);
121    EREG(ib, WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
122    END_BATCH();
123}
124
125void
126start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
127{
128    RADEONInfoPtr info = RADEONPTR(pScrn);
129
130    if (info->ChipFamily < CHIP_FAMILY_RV770) {
131	BEGIN_BATCH(5);
132	PACK3(ib, IT_START_3D_CMDBUF, 1);
133	E32(ib, 0);
134    } else
135	BEGIN_BATCH(3);
136
137    PACK3(ib, IT_CONTEXT_CONTROL, 2);
138    E32(ib, 0x80000000);
139    E32(ib, 0x80000000);
140    END_BATCH();
141
142}
143
144/*
145 * Setup of functional groups
146 */
147
148// asic stack/thread/gpr limits - need to query the drm
149static void
150sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
151{
152    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
153    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
154    RADEONInfoPtr info = RADEONPTR(pScrn);
155
156    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
157	(info->ChipFamily == CHIP_FAMILY_RV620) ||
158	(info->ChipFamily == CHIP_FAMILY_RS780) ||
159	(info->ChipFamily == CHIP_FAMILY_RS880) ||
160	(info->ChipFamily == CHIP_FAMILY_RV710))
161	sq_config = 0;						// no VC
162    else
163	sq_config = VC_ENABLE_bit;
164
165    sq_config |= (DX9_CONSTS_bit |
166		  ALU_INST_PREFER_VECTOR_bit |
167		  (sq_conf->ps_prio << PS_PRIO_shift) |
168		  (sq_conf->vs_prio << VS_PRIO_shift) |
169		  (sq_conf->gs_prio << GS_PRIO_shift) |
170		  (sq_conf->es_prio << ES_PRIO_shift));
171
172    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
173			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
174			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
175    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
176			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
177
178    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
179			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
180			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
181			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
182
183    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
184				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
185
186    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
187				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
188
189    BEGIN_BATCH(8);
190    PACK0(ib, SQ_CONFIG, 6);
191    E32(ib, sq_config);
192    E32(ib, sq_gpr_resource_mgmt_1);
193    E32(ib, sq_gpr_resource_mgmt_2);
194    E32(ib, sq_thread_resource_mgmt);
195    E32(ib, sq_stack_resource_mgmt_1);
196    E32(ib, sq_stack_resource_mgmt_2);
197    END_BATCH();
198}
199
200void
201set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain)
202{
203    uint32_t cb_color_info;
204    int pitch, slice, h;
205    RADEONInfoPtr info = RADEONPTR(pScrn);
206
207    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
208		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
209		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
210		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
211		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
212		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
213    if (cb_conf->read_size)
214	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
215    if (cb_conf->blend_clamp)
216	cb_color_info |= BLEND_CLAMP_bit;
217    if (cb_conf->clear_color)
218	cb_color_info |= CLEAR_COLOR_bit;
219    if (cb_conf->blend_bypass)
220	cb_color_info |= BLEND_BYPASS_bit;
221    if (cb_conf->blend_float32)
222	cb_color_info |= BLEND_FLOAT32_bit;
223    if (cb_conf->simple_float)
224	cb_color_info |= SIMPLE_FLOAT_bit;
225    if (cb_conf->round_mode)
226	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
227    if (cb_conf->tile_compact)
228	cb_color_info |= TILE_COMPACT_bit;
229    if (cb_conf->source_format)
230	cb_color_info |= SOURCE_FORMAT_bit;
231
232    pitch = (cb_conf->w / 8) - 1;
233    h = RADEON_ALIGN(cb_conf->h, 8);
234    slice = ((cb_conf->w * h) / 64) - 1;
235
236    BEGIN_BATCH(3 + 2);
237    EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
238    RELOC_BATCH(cb_conf->bo, 0, domain);
239    END_BATCH();
240
241    // rv6xx workaround
242    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
243        (info->ChipFamily < CHIP_FAMILY_RV770)) {
244        BEGIN_BATCH(2);
245        PACK3(ib, IT_SURFACE_BASE_UPDATE, 1);
246        E32(ib, (2 << cb_conf->id));
247        END_BATCH();
248    }
249    /* Set CMASK & TILE buffer to the offset of color buffer as
250     * we don't use those this shouldn't cause any issue and we
251     * then have a valid cmd stream
252     */
253    BEGIN_BATCH(3 + 2);
254    EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
255    RELOC_BATCH(cb_conf->bo, 0, domain);
256    END_BATCH();
257    BEGIN_BATCH(3 + 2);
258    EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
259    RELOC_BATCH(cb_conf->bo, 0, domain);
260    END_BATCH();
261    BEGIN_BATCH(9);
262    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
263    EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
264						    (slice << SLICE_TILE_MAX_shift)));
265    EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
266						    (0    << SLICE_MAX_shift)));
267    EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
268						    (0    << FMASK_TILE_MAX_shift)));
269    END_BATCH();
270
271    BEGIN_BATCH(3 + 2);
272    EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
273    RELOC_BATCH(cb_conf->bo, 0, domain);
274    END_BATCH();
275
276}
277
278static void
279cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr,
280		    struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
281{
282    RADEONInfoPtr info = RADEONPTR(pScrn);
283    uint32_t cp_coher_size;
284    if (size == 0xffffffff)
285	cp_coher_size = 0xffffffff;
286    else
287	cp_coher_size = ((size + 255) >> 8);
288
289    BEGIN_BATCH(5 + 2);
290    PACK3(ib, IT_SURFACE_SYNC, 4);
291    E32(ib, sync_type);
292    E32(ib, cp_coher_size);
293    E32(ib, (mc_addr >> 8));
294    E32(ib, 10); /* poll interval */
295    RELOC_BATCH(bo, rdomains, wdomain);
296    END_BATCH();
297}
298
299/* inserts a wait for vline in the command stream */
300void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
301			xf86CrtcPtr crtc, int start, int stop)
302{
303    RADEONInfoPtr  info = RADEONPTR(pScrn);
304    uint32_t offset;
305
306    if (!crtc)
307        return;
308
309    if (stop < start)
310        return;
311
312    if (!crtc->enabled)
313        return;
314
315    if (info->cs) {
316        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
317	    return;
318    } else {
319#ifdef USE_EXA
320	if (info->useEXA)
321	    offset = exaGetPixmapOffset(pPix);
322	else
323#endif
324	    offset = pPix->devPrivate.ptr - info->FB;
325
326	/* if drawing to front buffer */
327	if (offset != 0)
328	    return;
329    }
330
331    start = max(start, 0);
332    stop = min(stop, crtc->mode.VDisplay);
333
334    if (start > crtc->mode.VDisplay)
335        return;
336
337#if defined(XF86DRM_MODE)
338    if (info->cs) {
339	drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
340
341	BEGIN_BATCH(11);
342	/* set the VLINE range */
343	EREG(ib, AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
344	     (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
345	     (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
346
347	/* tell the CP to poll the VLINE state register */
348	PACK3(ib, IT_WAIT_REG_MEM, 6);
349	E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
350	E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
351	E32(ib, 0);
352	E32(ib, 0);                          // Ref value
353	E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
354	E32(ib, 10);                         // Wait interval
355	/* add crtc reloc */
356	PACK3(ib, IT_NOP, 1);
357	E32(ib, drmmode_crtc->mode_crtc->crtc_id);
358	END_BATCH();
359    } else
360#endif
361    {
362	RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
363
364	BEGIN_BATCH(9);
365	/* set the VLINE range */
366	EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset,
367	     (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
368	     (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
369
370	/* tell the CP to poll the VLINE state register */
371	PACK3(ib, IT_WAIT_REG_MEM, 6);
372	E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
373	E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset));
374	E32(ib, 0);
375	E32(ib, 0);                          // Ref value
376	E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
377	E32(ib, 10);                         // Wait interval
378	END_BATCH();
379    }
380}
381
382void
383fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain)
384{
385    RADEONInfoPtr info = RADEONPTR(pScrn);
386    uint32_t sq_pgm_resources;
387
388    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
389			(fs_conf->stack_size << STACK_SIZE_shift));
390
391    if (fs_conf->dx10_clamp)
392	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
393
394    BEGIN_BATCH(3 + 2);
395    EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
396    RELOC_BATCH(fs_conf->bo, domain, 0);
397    END_BATCH();
398
399    BEGIN_BATCH(6);
400    EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources);
401    EREG(ib, SQ_PGM_CF_OFFSET_FS, 0);
402    END_BATCH();
403}
404
405void
406vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain)
407{
408    RADEONInfoPtr info = RADEONPTR(pScrn);
409    uint32_t sq_pgm_resources;
410
411    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
412			(vs_conf->stack_size << STACK_SIZE_shift));
413
414    if (vs_conf->dx10_clamp)
415	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
416    if (vs_conf->fetch_cache_lines)
417	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
418    if (vs_conf->uncached_first_inst)
419	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
420
421    /* flush SQ cache */
422    cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
423			vs_conf->shader_size, vs_conf->shader_addr,
424			vs_conf->bo, domain, 0);
425
426    BEGIN_BATCH(3 + 2);
427    EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
428    RELOC_BATCH(vs_conf->bo, domain, 0);
429    END_BATCH();
430
431    BEGIN_BATCH(6);
432    EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources);
433    EREG(ib, SQ_PGM_CF_OFFSET_VS, 0);
434    END_BATCH();
435}
436
437void
438ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain)
439{
440    RADEONInfoPtr info = RADEONPTR(pScrn);
441    uint32_t sq_pgm_resources;
442
443    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
444			(ps_conf->stack_size << STACK_SIZE_shift));
445
446    if (ps_conf->dx10_clamp)
447	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
448    if (ps_conf->fetch_cache_lines)
449	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
450    if (ps_conf->uncached_first_inst)
451	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
452    if (ps_conf->clamp_consts)
453	sq_pgm_resources |= CLAMP_CONSTS_bit;
454
455    /* flush SQ cache */
456    cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
457			ps_conf->shader_size, ps_conf->shader_addr,
458			ps_conf->bo, domain, 0);
459
460    BEGIN_BATCH(3 + 2);
461    EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
462    RELOC_BATCH(ps_conf->bo, domain, 0);
463    END_BATCH();
464
465    BEGIN_BATCH(9);
466    EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources);
467    EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
468    EREG(ib, SQ_PGM_CF_OFFSET_PS, 0);
469    END_BATCH();
470}
471
472void
473set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
474{
475    RADEONInfoPtr info = RADEONPTR(pScrn);
476    int i;
477    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
478
479    BEGIN_BATCH(2 + countreg);
480    PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
481    for (i = 0; i < countreg; i++)
482	EFLOAT(ib, const_buf[i]);
483    END_BATCH();
484}
485
486void
487set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
488{
489    RADEONInfoPtr info = RADEONPTR(pScrn);
490    /* bool register order is: ps, vs, gs; one register each
491     * 1 bits per bool; 32 bools each for ps, vs, gs.
492     */
493    BEGIN_BATCH(3);
494    EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
495    END_BATCH();
496}
497
498static void
499set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain)
500{
501    RADEONInfoPtr info = RADEONPTR(pScrn);
502    struct radeon_accel_state *accel_state = info->accel_state;
503    uint32_t sq_vtx_constant_word2;
504
505    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
506			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
507			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
508			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
509			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
510    if (res->clamp_x)
511	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
512
513    if (res->format_comp_all)
514	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
515
516    if (res->srf_mode_all)
517	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
518
519    /* flush vertex cache */
520    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
521	(info->ChipFamily == CHIP_FAMILY_RV620) ||
522	(info->ChipFamily == CHIP_FAMILY_RS780) ||
523	(info->ChipFamily == CHIP_FAMILY_RS880) ||
524	(info->ChipFamily == CHIP_FAMILY_RV710))
525	cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
526			    accel_state->vb_offset, accel_state->vb_mc_addr,
527			    res->bo,
528			    domain, 0);
529    else
530	cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit,
531			    accel_state->vb_offset, accel_state->vb_mc_addr,
532			    res->bo,
533			    domain, 0);
534
535    BEGIN_BATCH(9 + 2);
536    PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
537    E32(ib, res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
538    E32(ib, (res->vtx_num_entries << 2) - 1);			// 1: SIZE
539    E32(ib, sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
540    E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
541    E32(ib, 0);							// 4: n/a
542    E32(ib, 0);							// 5: n/a
543    E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
544    RELOC_BATCH(res->bo, domain, 0);
545    END_BATCH();
546}
547
548void
549set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain)
550{
551    RADEONInfoPtr info = RADEONPTR(pScrn);
552    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
553    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
554
555    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
556			     (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
557
558    if (tex_res->w)
559	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
560				  ((tex_res->w - 1) << TEX_WIDTH_shift));
561
562    if (tex_res->tile_type)
563	sq_tex_resource_word0 |= TILE_TYPE_bit;
564
565    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
566
567    if (tex_res->h)
568	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
569    if (tex_res->depth)
570	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
571
572    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
573			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
574			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
575			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
576			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
577			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
578			     (tex_res->request_size << REQUEST_SIZE_shift) |
579			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
580			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
581			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
582			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
583			     (tex_res->base_level << BASE_LEVEL_shift));
584
585    if (tex_res->srf_mode_all)
586	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
587    if (tex_res->force_degamma)
588	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
589
590    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
591			     (tex_res->base_array << BASE_ARRAY_shift) |
592			     (tex_res->last_array << LAST_ARRAY_shift));
593
594    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
595			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
596			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
597
598    if (tex_res->interlaced)
599	sq_tex_resource_word6 |= INTERLACED_bit;
600
601    /* flush texture cache */
602    cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
603			tex_res->size, tex_res->base,
604			tex_res->bo, domain, 0);
605
606    BEGIN_BATCH(9 + 4);
607    PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
608    E32(ib, sq_tex_resource_word0);
609    E32(ib, sq_tex_resource_word1);
610    E32(ib, ((tex_res->base) >> 8));
611    E32(ib, ((tex_res->mip_base) >> 8));
612    E32(ib, sq_tex_resource_word4);
613    E32(ib, sq_tex_resource_word5);
614    E32(ib, sq_tex_resource_word6);
615    RELOC_BATCH(tex_res->bo, domain, 0);
616    RELOC_BATCH(tex_res->mip_bo, domain, 0);
617    END_BATCH();
618}
619
620void
621set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
622{
623    RADEONInfoPtr info = RADEONPTR(pScrn);
624    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
625
626    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
627			    (s->clamp_y       << CLAMP_Y_shift)					|
628			    (s->clamp_z       << CLAMP_Z_shift)					|
629			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
630			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
631			    (s->z_filter      << Z_FILTER_shift)	|
632			    (s->mip_filter    << MIP_FILTER_shift)				|
633			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
634			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
635			    (s->chroma_key    << CHROMA_KEY_shift));
636    if (s->point_sampling_clamp)
637	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
638    if (s->tex_array_override)
639	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
640    if (s->lod_uses_minor_axis)
641	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
642
643    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
644			    (s->max_lod       << MAX_LOD_shift)					|
645			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
646
647    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
648			    (s->perf_mip      << PERF_MIP_shift)	|
649			    (s->perf_z        << PERF_Z_shift));
650    if (s->mc_coord_truncate)
651	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
652    if (s->force_degamma)
653	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
654    if (s->high_precision_filter)
655	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
656    if (s->fetch_4)
657	sq_tex_sampler_word2 |= FETCH_4_bit;
658    if (s->sample_is_pcf)
659	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
660    if (s->type)
661	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
662
663    BEGIN_BATCH(5);
664    PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
665    E32(ib, sq_tex_sampler_word0);
666    E32(ib, sq_tex_sampler_word1);
667    E32(ib, sq_tex_sampler_word2);
668    END_BATCH();
669}
670
671//XXX deal with clip offsets in clip setup
672void
673set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
674{
675    RADEONInfoPtr info = RADEONPTR(pScrn);
676
677    BEGIN_BATCH(4);
678    PACK0(ib, PA_SC_SCREEN_SCISSOR_TL, 2);
679    E32(ib, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
680	     (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
681    E32(ib, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
682	     (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
683    END_BATCH();
684}
685
686void
687set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
688{
689    RADEONInfoPtr info = RADEONPTR(pScrn);
690
691    BEGIN_BATCH(4);
692    PACK0(ib, PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
693    E32(ib, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
694	     (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
695	     WINDOW_OFFSET_DISABLE_bit));
696    E32(ib, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
697	     (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
698    END_BATCH();
699}
700
701void
702set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
703{
704    RADEONInfoPtr info = RADEONPTR(pScrn);
705
706    BEGIN_BATCH(4);
707    PACK0(ib, PA_SC_GENERIC_SCISSOR_TL, 2);
708    E32(ib, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
709	     (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
710	     WINDOW_OFFSET_DISABLE_bit));
711    E32(ib, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
712	     (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
713    END_BATCH();
714}
715
716void
717set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
718{
719    RADEONInfoPtr info = RADEONPTR(pScrn);
720
721    BEGIN_BATCH(4);
722    PACK0(ib, PA_SC_WINDOW_SCISSOR_TL, 2);
723    E32(ib, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
724	     (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
725	     WINDOW_OFFSET_DISABLE_bit));
726    E32(ib, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
727	      (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
728    END_BATCH();
729}
730
731void
732set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
733{
734    RADEONInfoPtr info = RADEONPTR(pScrn);
735
736    BEGIN_BATCH(4);
737    PACK0(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
738    E32(ib, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
739	     (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
740    E32(ib, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
741	     (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
742    END_BATCH();
743}
744
745/*
746 * Setup of default state
747 */
748
749void
750set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
751{
752    tex_resource_t tex_res;
753    shader_config_t fs_conf;
754    sq_config_t sq_conf;
755    int i;
756    RADEONInfoPtr info = RADEONPTR(pScrn);
757    struct radeon_accel_state *accel_state = info->accel_state;
758
759    if (accel_state->XInited3D)
760	return;
761
762    memset(&tex_res, 0, sizeof(tex_resource_t));
763    memset(&fs_conf, 0, sizeof(shader_config_t));
764
765    accel_state->XInited3D = TRUE;
766
767    start_3d(pScrn, accel_state->ib);
768
769    // SQ
770    sq_conf.ps_prio = 0;
771    sq_conf.vs_prio = 1;
772    sq_conf.gs_prio = 2;
773    sq_conf.es_prio = 3;
774    // need to set stack/thread/gpr limits based on the asic
775    // for now just set them low enough so any card will work
776    // see r600_cp.c in the drm
777    switch (info->ChipFamily) {
778    case CHIP_FAMILY_R600:
779	sq_conf.num_ps_gprs = 192;
780	sq_conf.num_vs_gprs = 56;
781	sq_conf.num_temp_gprs = 4;
782	sq_conf.num_gs_gprs = 0;
783	sq_conf.num_es_gprs = 0;
784	sq_conf.num_ps_threads = 136;
785	sq_conf.num_vs_threads = 48;
786	sq_conf.num_gs_threads = 4;
787	sq_conf.num_es_threads = 4;
788	sq_conf.num_ps_stack_entries = 128;
789	sq_conf.num_vs_stack_entries = 128;
790	sq_conf.num_gs_stack_entries = 0;
791	sq_conf.num_es_stack_entries = 0;
792	break;
793    case CHIP_FAMILY_RV630:
794    case CHIP_FAMILY_RV635:
795	sq_conf.num_ps_gprs = 84;
796	sq_conf.num_vs_gprs = 36;
797	sq_conf.num_temp_gprs = 4;
798	sq_conf.num_gs_gprs = 0;
799	sq_conf.num_es_gprs = 0;
800	sq_conf.num_ps_threads = 144;
801	sq_conf.num_vs_threads = 40;
802	sq_conf.num_gs_threads = 4;
803	sq_conf.num_es_threads = 4;
804	sq_conf.num_ps_stack_entries = 40;
805	sq_conf.num_vs_stack_entries = 40;
806	sq_conf.num_gs_stack_entries = 32;
807	sq_conf.num_es_stack_entries = 16;
808	break;
809    case CHIP_FAMILY_RV610:
810    case CHIP_FAMILY_RV620:
811    case CHIP_FAMILY_RS780:
812    case CHIP_FAMILY_RS880:
813    default:
814	sq_conf.num_ps_gprs = 84;
815	sq_conf.num_vs_gprs = 36;
816	sq_conf.num_temp_gprs = 4;
817	sq_conf.num_gs_gprs = 0;
818	sq_conf.num_es_gprs = 0;
819	sq_conf.num_ps_threads = 136;
820	sq_conf.num_vs_threads = 48;
821	sq_conf.num_gs_threads = 4;
822	sq_conf.num_es_threads = 4;
823	sq_conf.num_ps_stack_entries = 40;
824	sq_conf.num_vs_stack_entries = 40;
825	sq_conf.num_gs_stack_entries = 32;
826	sq_conf.num_es_stack_entries = 16;
827	break;
828    case CHIP_FAMILY_RV670:
829	sq_conf.num_ps_gprs = 144;
830	sq_conf.num_vs_gprs = 40;
831	sq_conf.num_temp_gprs = 4;
832	sq_conf.num_gs_gprs = 0;
833	sq_conf.num_es_gprs = 0;
834	sq_conf.num_ps_threads = 136;
835	sq_conf.num_vs_threads = 48;
836	sq_conf.num_gs_threads = 4;
837	sq_conf.num_es_threads = 4;
838	sq_conf.num_ps_stack_entries = 40;
839	sq_conf.num_vs_stack_entries = 40;
840	sq_conf.num_gs_stack_entries = 32;
841	sq_conf.num_es_stack_entries = 16;
842	break;
843    case CHIP_FAMILY_RV770:
844	sq_conf.num_ps_gprs = 192;
845	sq_conf.num_vs_gprs = 56;
846	sq_conf.num_temp_gprs = 4;
847	sq_conf.num_gs_gprs = 0;
848	sq_conf.num_es_gprs = 0;
849	sq_conf.num_ps_threads = 188;
850	sq_conf.num_vs_threads = 60;
851	sq_conf.num_gs_threads = 0;
852	sq_conf.num_es_threads = 0;
853	sq_conf.num_ps_stack_entries = 256;
854	sq_conf.num_vs_stack_entries = 256;
855	sq_conf.num_gs_stack_entries = 0;
856	sq_conf.num_es_stack_entries = 0;
857	break;
858    case CHIP_FAMILY_RV730:
859    case CHIP_FAMILY_RV740:
860	sq_conf.num_ps_gprs = 84;
861	sq_conf.num_vs_gprs = 36;
862	sq_conf.num_temp_gprs = 4;
863	sq_conf.num_gs_gprs = 0;
864	sq_conf.num_es_gprs = 0;
865	sq_conf.num_ps_threads = 188;
866	sq_conf.num_vs_threads = 60;
867	sq_conf.num_gs_threads = 0;
868	sq_conf.num_es_threads = 0;
869	sq_conf.num_ps_stack_entries = 128;
870	sq_conf.num_vs_stack_entries = 128;
871	sq_conf.num_gs_stack_entries = 0;
872	sq_conf.num_es_stack_entries = 0;
873	break;
874    case CHIP_FAMILY_RV710:
875	sq_conf.num_ps_gprs = 192;
876	sq_conf.num_vs_gprs = 56;
877	sq_conf.num_temp_gprs = 4;
878	sq_conf.num_gs_gprs = 0;
879	sq_conf.num_es_gprs = 0;
880	sq_conf.num_ps_threads = 144;
881	sq_conf.num_vs_threads = 48;
882	sq_conf.num_gs_threads = 0;
883	sq_conf.num_es_threads = 0;
884	sq_conf.num_ps_stack_entries = 128;
885	sq_conf.num_vs_stack_entries = 128;
886	sq_conf.num_gs_stack_entries = 0;
887	sq_conf.num_es_stack_entries = 0;
888	break;
889    }
890
891    sq_setup(pScrn, ib, &sq_conf);
892
893    /* set fake reloc for unused depth */
894    BEGIN_BATCH(3 + 2);
895    EREG(ib, DB_DEPTH_INFO, 0);
896    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
897    END_BATCH();
898
899    BEGIN_BATCH(80);
900    if (info->ChipFamily < CHIP_FAMILY_RV770) {
901	EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
902			       (28 << TD_FIFO_CREDIT_shift)));
903	EREG(ib, VC_ENHANCE, 0);
904	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
905	EREG(ib, DB_DEBUG, 0x82000000); /* ? */
906	EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
907				 (16 << DEPTH_FLUSH_shift) |
908				 (0 << FORCE_SUMMARIZE_shift) |
909				 (4 << DEPTH_PENDING_FREE_shift) |
910				 (16 << DEPTH_CACHELINE_FREE_shift) |
911				 0));
912    } else {
913	EREG(ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
914			       (28 << TD_FIFO_CREDIT_shift)));
915	EREG(ib, VC_ENHANCE, 0);
916	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
917	EREG(ib, DB_DEBUG, 0);
918	EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
919				 (16 << DEPTH_FLUSH_shift) |
920				 (0 << FORCE_SUMMARIZE_shift) |
921				 (4 << DEPTH_PENDING_FREE_shift) |
922				 (4 << DEPTH_CACHELINE_FREE_shift) |
923				 0));
924    }
925
926    PACK0(ib, SQ_VTX_BASE_VTX_LOC, 2);
927    E32(ib, 0);
928    E32(ib, 0);
929
930    PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9);
931    E32(ib, 0); // SQ_ESGS_RING_ITEMSIZE
932    E32(ib, 0); // SQ_GSVS_RING_ITEMSIZE
933    E32(ib, 0); // SQ_ESTMP_RING_ITEMSIZE
934    E32(ib, 0); // SQ_GSTMP_RING_ITEMSIZE
935    E32(ib, 0); // SQ_VSTMP_RING_ITEMSIZE
936    E32(ib, 0); // SQ_PSTMP_RING_ITEMSIZE
937    E32(ib, 0); // SQ_FBUF_RING_ITEMSIZE
938    E32(ib, 0); // SQ_REDUC_RING_ITEMSIZE
939    E32(ib, 0); // SQ_GS_VERT_ITEMSIZE
940
941    // DB
942    EREG(ib, DB_DEPTH_CONTROL,                    0);
943    PACK0(ib, DB_RENDER_CONTROL, 2);
944    E32(ib, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
945    if (info->ChipFamily < CHIP_FAMILY_RV770)
946	E32(ib, FORCE_SHADER_Z_ORDER_bit);
947    else
948	E32(ib, 0);
949    EREG(ib, DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
950						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
951						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
952						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
953    EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
954				 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
955
956    PACK0(ib, DB_STENCIL_CLEAR, 2);
957    E32(ib, 0); // DB_STENCIL_CLEAR
958    E32(ib, 0); // DB_DEPTH_CLEAR
959
960    PACK0(ib, DB_STENCILREFMASK, 3);
961    E32(ib, 0); // DB_STENCILREFMASK
962    E32(ib, 0); // DB_STENCILREFMASK_BF
963    E32(ib, 0); // SX_ALPHA_REF
964
965    PACK0(ib, CB_CLRCMP_CONTROL, 4);
966    E32(ib, 1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
967    E32(ib, 0);							// CB_CLRCMP_SRC
968    E32(ib, 0);							// CB_CLRCMP_DST
969    E32(ib, 0);							// CB_CLRCMP_MSK
970
971    EREG(ib, CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
972    EREG(ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
973
974    PACK0(ib, SX_ALPHA_TEST_CONTROL, 5);
975    E32(ib, 0); // SX_ALPHA_TEST_CONTROL
976    E32(ib, 0x00000000); // CB_BLEND_RED
977    E32(ib, 0x00000000); // CB_BLEND_GREEN
978    E32(ib, 0x00000000); // CB_BLEND_BLUE
979    E32(ib, 0x00000000); // CB_BLEND_ALPHA
980
981    EREG(ib, PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
982						   (0 << WINDOW_Y_OFFSET_shift)));
983
984    if (info->ChipFamily < CHIP_FAMILY_RV770)
985	EREG(ib, R7xx_PA_SC_EDGERULE,             0x00000000);
986    else
987	EREG(ib, R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
988
989    EREG(ib, PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
990
991    END_BATCH();
992
993    /* clip boolean is set to always visible -> doesn't matter */
994    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
995	set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
996
997    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
998	set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
999
1000    BEGIN_BATCH(42);
1001    PACK0(ib, PA_SC_MPASS_PS_CNTL, 2);
1002    E32(ib, 0);
1003    if (info->ChipFamily < CHIP_FAMILY_RV770)
1004	E32(ib, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
1005    else
1006	E32(ib, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
1007		 0x00500000)); /* ? */
1008
1009    PACK0(ib, PA_SC_LINE_CNTL, 9);
1010    E32(ib, 0); // PA_SC_LINE_CNTL
1011    E32(ib, 0); // PA_SC_AA_CONFIG
1012    E32(ib, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL
1013	     (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
1014    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1015    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_DISC_ADJ
1016    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1017    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1018    E32(ib, 0);                                                 // PA_SC_AA_SAMPLE_LOCS_MCTX
1019    E32(ib, 0);                                                 // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M
1020
1021    EREG(ib, PA_SC_AA_MASK,                       0xFFFFFFFF);
1022
1023    PACK0(ib, PA_CL_CLIP_CNTL, 5);
1024    E32(ib, CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1025    E32(ib, FACE_bit);         // PA_SU_SC_MODE_CNTL
1026    E32(ib, VTX_XY_FMT_bit);   // PA_CL_VTE_CNTL
1027    E32(ib, 0);                // PA_CL_VS_OUT_CNTL
1028    E32(ib, 0);                // PA_CL_NANINF_CNTL
1029
1030    PACK0(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1031    E32(ib, 0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL
1032    E32(ib, 0); // PA_SU_POLY_OFFSET_CLAMP
1033    E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_SCALE
1034    E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_OFFSET
1035    E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_SCALE
1036    E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_OFFSET
1037
1038    // SPI
1039    if (info->ChipFamily < CHIP_FAMILY_RV770)
1040	EREG(ib, R7xx_SPI_THREAD_GROUPING,        0);
1041    else
1042	EREG(ib, R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
1043
1044    PACK0(ib, SPI_INPUT_Z, 4);
1045    E32(ib, 0); // SPI_INPUT_Z
1046    E32(ib, 0); // SPI_FOG_CNTL
1047    E32(ib, 0); // SPI_FOG_FUNC_SCALE
1048    E32(ib, 0); // SPI_FOG_FUNC_BIAS
1049
1050    END_BATCH();
1051
1052    // clear FS
1053    fs_conf.bo = accel_state->shaders_bo;
1054    fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1055
1056    // VGT
1057    BEGIN_BATCH(43);
1058    PACK0(ib, VGT_MAX_VTX_INDX, 4);
1059    E32(ib, 0xffffff); // VGT_MAX_VTX_INDX
1060    E32(ib, 0); // VGT_MIN_VTX_INDX
1061    E32(ib, 0); // VGT_INDX_OFFSET
1062    E32(ib, 0); // VGT_MULTI_PRIM_IB_RESET_INDX
1063
1064    EREG(ib, VGT_PRIMITIVEID_EN,                  0);
1065    EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN,          0);
1066
1067    PACK0(ib, VGT_INSTANCE_STEP_RATE_0, 2);
1068    E32(ib, 0); // VGT_INSTANCE_STEP_RATE_0
1069    E32(ib, 0); // VGT_INSTANCE_STEP_RATE_1
1070
1071    PACK0(ib, PA_SU_POINT_SIZE, 17);
1072    E32(ib, 0); // PA_SU_POINT_SIZE
1073    E32(ib, 0); // PA_SU_POINT_MINMAX
1074    E32(ib, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1075    E32(ib, 0); // PA_SC_LINE_STIPPLE
1076    E32(ib, 0); // VGT_OUTPUT_PATH_CNTL
1077    E32(ib, 0); // VGT_HOS_CNTL
1078    E32(ib, 0); // VGT_HOS_MAX_TESS_LEVEL
1079    E32(ib, 0); // VGT_HOS_MIN_TESS_LEVEL
1080    E32(ib, 0); // VGT_HOS_REUSE_DEPTH
1081    E32(ib, 0); // VGT_GROUP_PRIM_TYPE
1082    E32(ib, 0); // VGT_GROUP_FIRST_DECR
1083    E32(ib, 0); // VGT_GROUP_DECR
1084    E32(ib, 0); // VGT_GROUP_VECT_0_CNTL
1085    E32(ib, 0); // VGT_GROUP_VECT_1_CNTL
1086    E32(ib, 0); // VGT_GROUP_VECT_0_FMT_CNTL
1087    E32(ib, 0); // VGT_GROUP_VECT_1_FMT_CNTL
1088    E32(ib, 0); // VGT_GS_MODE
1089
1090    PACK0(ib, VGT_STRMOUT_EN, 3);
1091    E32(ib, 0); // VGT_STRMOUT_EN
1092    E32(ib, 0); // VGT_REUSE_OFF
1093    E32(ib, 0); // VGT_VTX_CNT_EN
1094
1095    EREG(ib, VGT_STRMOUT_BUFFER_EN,               0);
1096    END_BATCH();
1097}
1098
1099
1100/*
1101 * Commands
1102 */
1103
1104void
1105draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
1106{
1107    RADEONInfoPtr info = RADEONPTR(pScrn);
1108    uint32_t i, count;
1109
1110    // calculate num of packets
1111    count = 2;
1112    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1113	count += (draw_conf->num_indices + 1) / 2;
1114    else
1115	count += draw_conf->num_indices;
1116
1117    BEGIN_BATCH(8 + count);
1118    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1119    PACK3(ib, IT_INDEX_TYPE, 1);
1120    E32(ib, draw_conf->index_type);
1121    PACK3(ib, IT_NUM_INSTANCES, 1);
1122    E32(ib, draw_conf->num_instances);
1123
1124    PACK3(ib, IT_DRAW_INDEX_IMMD, count);
1125    E32(ib, draw_conf->num_indices);
1126    E32(ib, draw_conf->vgt_draw_initiator);
1127
1128    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1129	for (i = 0; i < draw_conf->num_indices; i += 2) {
1130	    if ((i + 1) == draw_conf->num_indices)
1131		E32(ib, indices[i]);
1132	    else
1133		E32(ib, (indices[i] | (indices[i + 1] << 16)));
1134	}
1135    } else {
1136	for (i = 0; i < draw_conf->num_indices; i++)
1137	    E32(ib, indices[i]);
1138    }
1139    END_BATCH();
1140}
1141
1142void
1143draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
1144{
1145    RADEONInfoPtr info = RADEONPTR(pScrn);
1146
1147    BEGIN_BATCH(10);
1148    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1149    PACK3(ib, IT_INDEX_TYPE, 1);
1150    E32(ib, draw_conf->index_type);
1151    PACK3(ib, IT_NUM_INSTANCES, 1);
1152    E32(ib, draw_conf->num_instances);
1153    PACK3(ib, IT_DRAW_INDEX_AUTO, 2);
1154    E32(ib, draw_conf->num_indices);
1155    E32(ib, draw_conf->vgt_draw_initiator);
1156    END_BATCH();
1157}
1158
1159void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1160{
1161    RADEONInfoPtr info = RADEONPTR(pScrn);
1162    struct radeon_accel_state *accel_state = info->accel_state;
1163    draw_config_t   draw_conf;
1164    vtx_resource_t  vtx_res;
1165
1166    if (accel_state->vb_start_op == -1)
1167	return;
1168
1169    CLEAR (draw_conf);
1170    CLEAR (vtx_res);
1171
1172    if (accel_state->vb_offset == accel_state->vb_start_op) {
1173        R600IBDiscard(pScrn, accel_state->ib);
1174	radeon_vb_discard(pScrn);
1175	return;
1176    }
1177
1178    /* Vertex buffer setup */
1179    accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
1180    vtx_res.id              = SQ_VTX_RESOURCE_vs;
1181    vtx_res.vtx_size_dw     = vtx_size / 4;
1182    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
1183    vtx_res.mem_req_size    = 1;
1184    vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
1185    vtx_res.bo              = accel_state->vb_bo;
1186    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1187
1188    /* Draw */
1189    draw_conf.prim_type          = DI_PT_RECTLIST;
1190    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1191    draw_conf.num_instances      = 1;
1192    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1193    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1194
1195    draw_auto(pScrn, accel_state->ib, &draw_conf);
1196
1197    /* XXX drm should handle this in fence submit */
1198    wait_3d_idle_clean(pScrn, accel_state->ib);
1199
1200    /* sync dst surface */
1201    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1202			accel_state->dst_size, accel_state->dst_obj.offset,
1203			accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1204
1205    accel_state->vb_start_op = -1;
1206    accel_state->ib_reset_op = 0;
1207
1208#if KMS_MULTI_OP
1209    if (!info->cs)
1210#endif
1211	R600CPFlushIndirect(pScrn, accel_state->ib);
1212}
1213
1214