r6xx_accel.c revision ad43ddac
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *          Matthias Hopf <mhopf@suse.de>
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#include "xf86.h"
31
32#include <errno.h>
33
34#include "radeon.h"
35#include "r600_shader.h"
36#include "radeon_reg.h"
37#include "r600_reg.h"
38#include "r600_state.h"
39
40#include "radeon_drm.h"
41#include "radeon_vbo.h"
42
43/* we try and batch operations together under KMS -
44   but it doesn't work yet without misrendering */
45#define KMS_MULTI_OP 1
46
47/* Flush the indirect buffer to the kernel for submission to the card */
48void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib)
49{
50    RADEONInfoPtr  info = RADEONPTR(pScrn);
51    drmBufPtr          buffer = ib;
52    int                start  = 0;
53    drm_radeon_indirect_t  indirect;
54
55#if defined(XF86DRM_MODE)
56    if (info->cs) {
57	radeon_cs_flush_indirect(pScrn);
58	return;
59    }
60#endif
61
62    if (!buffer) return;
63
64    //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
65    //       buffer->idx);
66
67    while (buffer->used & 0x3c){
68	BEGIN_BATCH(1);
69        E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */
70	END_BATCH();
71    }
72
73    //ErrorF("buffer bytes: %d\n", buffer->used);
74
75    indirect.idx     = buffer->idx;
76    indirect.start   = start;
77    indirect.end     = buffer->used;
78    indirect.discard = 1;
79
80    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
81			&indirect, sizeof(drm_radeon_indirect_t));
82
83}
84
85void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
86{
87#if defined(XF86DRM_MODE)
88    int ret;
89    RADEONInfoPtr info = RADEONPTR(pScrn);
90    if (info->cs) {
91	if (info->accel_state->ib_reset_op) {
92	    /* if we have data just reset the CS and ignore the operation */
93	    info->cs->cdw = info->accel_state->ib_reset_op;
94	    info->accel_state->ib_reset_op = 0;
95	    return;
96	}
97	if (info->accel_state->vb_ptr) {
98	    info->accel_state->vb_ptr = NULL;
99	}
100
101	info->accel_state->vb_offset = 0;
102	info->accel_state->vb_start_op = -1;
103
104	if (CS_FULL(info->cs)) {
105	    radeon_cs_flush_indirect(pScrn);
106	    return;
107	}
108	radeon_cs_erase(info->cs);
109	ret = radeon_cs_space_check(info->cs);
110	if (ret)
111	    ErrorF("space check failed in flush\n");
112	if (info->dri2.enabled) {
113		info->accel_state->XInited3D = FALSE;
114		info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN;
115	}
116    }
117#endif
118    if (!ib) return;
119
120    ib->used = 0;
121    R600CPFlushIndirect(pScrn, ib);
122}
123
124void
125wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
126{
127    RADEONInfoPtr info = RADEONPTR(pScrn);
128
129    //flush caches, don't generate timestamp
130    BEGIN_BATCH(5);
131    PACK3(ib, IT_EVENT_WRITE, 1);
132    E32(ib, CACHE_FLUSH_AND_INV_EVENT);
133    // wait for 3D idle clean
134    EREG(ib, WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
135						   WAIT_3D_IDLECLEAN_bit));
136    END_BATCH();
137}
138
139void
140wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
141{
142    RADEONInfoPtr info = RADEONPTR(pScrn);
143
144    BEGIN_BATCH(3);
145    EREG(ib, WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
146    END_BATCH();
147}
148
149void
150start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
151{
152    RADEONInfoPtr info = RADEONPTR(pScrn);
153
154    if (info->ChipFamily < CHIP_FAMILY_RV770) {
155	BEGIN_BATCH(5);
156	PACK3(ib, IT_START_3D_CMDBUF, 1);
157	E32(ib, 0);
158    } else
159	BEGIN_BATCH(3);
160
161    PACK3(ib, IT_CONTEXT_CONTROL, 2);
162    E32(ib, 0x80000000);
163    E32(ib, 0x80000000);
164    END_BATCH();
165
166}
167
168/*
169 * Setup of functional groups
170 */
171
172// asic stack/thread/gpr limits - need to query the drm
173static void
174sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
175{
176    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
177    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
178    RADEONInfoPtr info = RADEONPTR(pScrn);
179
180    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
181	(info->ChipFamily == CHIP_FAMILY_RV620) ||
182	(info->ChipFamily == CHIP_FAMILY_RS780) ||
183	(info->ChipFamily == CHIP_FAMILY_RS880) ||
184	(info->ChipFamily == CHIP_FAMILY_RV710))
185	sq_config = 0;						// no VC
186    else
187	sq_config = VC_ENABLE_bit;
188
189    sq_config |= (DX9_CONSTS_bit |
190		  ALU_INST_PREFER_VECTOR_bit |
191		  (sq_conf->ps_prio << PS_PRIO_shift) |
192		  (sq_conf->vs_prio << VS_PRIO_shift) |
193		  (sq_conf->gs_prio << GS_PRIO_shift) |
194		  (sq_conf->es_prio << ES_PRIO_shift));
195
196    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
197			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
198			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
199    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
200			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
201
202    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
203			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
204			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
205			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
206
207    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
208				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
209
210    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
211				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
212
213    BEGIN_BATCH(8);
214    PACK0(ib, SQ_CONFIG, 6);
215    E32(ib, sq_config);
216    E32(ib, sq_gpr_resource_mgmt_1);
217    E32(ib, sq_gpr_resource_mgmt_2);
218    E32(ib, sq_thread_resource_mgmt);
219    E32(ib, sq_stack_resource_mgmt_1);
220    E32(ib, sq_stack_resource_mgmt_2);
221    END_BATCH();
222}
223
224void
225set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain)
226{
227    uint32_t cb_color_info;
228    int pitch, slice, h;
229    RADEONInfoPtr info = RADEONPTR(pScrn);
230
231    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
232		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
233		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
234		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
235		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
236		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
237    if (cb_conf->read_size)
238	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
239    if (cb_conf->blend_clamp)
240	cb_color_info |= BLEND_CLAMP_bit;
241    if (cb_conf->clear_color)
242	cb_color_info |= CLEAR_COLOR_bit;
243    if (cb_conf->blend_bypass)
244	cb_color_info |= BLEND_BYPASS_bit;
245    if (cb_conf->blend_float32)
246	cb_color_info |= BLEND_FLOAT32_bit;
247    if (cb_conf->simple_float)
248	cb_color_info |= SIMPLE_FLOAT_bit;
249    if (cb_conf->round_mode)
250	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
251    if (cb_conf->tile_compact)
252	cb_color_info |= TILE_COMPACT_bit;
253    if (cb_conf->source_format)
254	cb_color_info |= SOURCE_FORMAT_bit;
255
256    pitch = (cb_conf->w / 8) - 1;
257    h = RADEON_ALIGN(cb_conf->h, 8);
258    slice = ((cb_conf->w * h) / 64) - 1;
259
260    BEGIN_BATCH(3 + 2);
261    EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
262    RELOC_BATCH(cb_conf->bo, 0, domain);
263    END_BATCH();
264
265    // rv6xx workaround
266    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
267        (info->ChipFamily < CHIP_FAMILY_RV770)) {
268        BEGIN_BATCH(2);
269        PACK3(ib, IT_SURFACE_BASE_UPDATE, 1);
270        E32(ib, (2 << cb_conf->id));
271        END_BATCH();
272    }
273    /* Set CMASK & TILE buffer to the offset of color buffer as
274     * we don't use those this shouldn't cause any issue and we
275     * then have a valid cmd stream
276     */
277    BEGIN_BATCH(3 + 2);
278    EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
279    RELOC_BATCH(cb_conf->bo, 0, domain);
280    END_BATCH();
281    BEGIN_BATCH(3 + 2);
282    EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
283    RELOC_BATCH(cb_conf->bo, 0, domain);
284    END_BATCH();
285    BEGIN_BATCH(12);
286    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
287    EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
288						    (slice << SLICE_TILE_MAX_shift)));
289    EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
290						    (0    << SLICE_MAX_shift)));
291    EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
292    EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
293						    (0    << FMASK_TILE_MAX_shift)));
294    END_BATCH();
295}
296
297void
298cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr,
299		    struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
300{
301    RADEONInfoPtr info = RADEONPTR(pScrn);
302    uint32_t cp_coher_size;
303    if (size == 0xffffffff)
304	cp_coher_size = 0xffffffff;
305    else
306	cp_coher_size = ((size + 255) >> 8);
307
308    BEGIN_BATCH(5 + 2);
309    PACK3(ib, IT_SURFACE_SYNC, 4);
310    E32(ib, sync_type);
311    E32(ib, cp_coher_size);
312    E32(ib, (mc_addr >> 8));
313    E32(ib, 10); /* poll interval */
314    RELOC_BATCH(bo, rdomains, wdomain);
315    END_BATCH();
316}
317
318/* inserts a wait for vline in the command stream */
319void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
320			xf86CrtcPtr crtc, int start, int stop)
321{
322    RADEONInfoPtr  info = RADEONPTR(pScrn);
323    uint32_t offset;
324
325    if (!crtc)
326        return;
327
328    if (stop < start)
329        return;
330
331    if (!crtc->enabled)
332        return;
333
334    if (info->cs) {
335        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
336	    return;
337    } else {
338#ifdef USE_EXA
339	if (info->useEXA)
340	    offset = exaGetPixmapOffset(pPix);
341	else
342#endif
343	    offset = pPix->devPrivate.ptr - info->FB;
344
345	/* if drawing to front buffer */
346	if (offset != 0)
347	    return;
348    }
349
350    start = max(start, 0);
351    stop = min(stop, crtc->mode.VDisplay);
352
353    if (start > crtc->mode.VDisplay)
354        return;
355
356#if defined(XF86DRM_MODE)
357    if (info->cs) {
358	drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
359
360	BEGIN_BATCH(11);
361	/* set the VLINE range */
362	EREG(ib, AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
363	     (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
364	     (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
365
366	/* tell the CP to poll the VLINE state register */
367	PACK3(ib, IT_WAIT_REG_MEM, 6);
368	E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
369	E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
370	E32(ib, 0);
371	E32(ib, 0);                          // Ref value
372	E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
373	E32(ib, 10);                         // Wait interval
374	/* add crtc reloc */
375	PACK3(ib, IT_NOP, 1);
376	E32(ib, drmmode_crtc->mode_crtc->crtc_id);
377	END_BATCH();
378    } else
379#endif
380    {
381	RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
382
383	BEGIN_BATCH(9);
384	/* set the VLINE range */
385	EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset,
386	     (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
387	     (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
388
389	/* tell the CP to poll the VLINE state register */
390	PACK3(ib, IT_WAIT_REG_MEM, 6);
391	E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
392	E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset));
393	E32(ib, 0);
394	E32(ib, 0);                          // Ref value
395	E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
396	E32(ib, 10);                         // Wait interval
397	END_BATCH();
398    }
399}
400
401void
402fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain)
403{
404    RADEONInfoPtr info = RADEONPTR(pScrn);
405    uint32_t sq_pgm_resources;
406
407    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
408			(fs_conf->stack_size << STACK_SIZE_shift));
409
410    if (fs_conf->dx10_clamp)
411	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
412
413    BEGIN_BATCH(3 + 2);
414    EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
415    RELOC_BATCH(fs_conf->bo, domain, 0);
416    END_BATCH();
417
418    BEGIN_BATCH(6);
419    EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources);
420    EREG(ib, SQ_PGM_CF_OFFSET_FS, 0);
421    END_BATCH();
422}
423
424void
425vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain)
426{
427    RADEONInfoPtr info = RADEONPTR(pScrn);
428    uint32_t sq_pgm_resources;
429
430    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
431			(vs_conf->stack_size << STACK_SIZE_shift));
432
433    if (vs_conf->dx10_clamp)
434	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
435    if (vs_conf->fetch_cache_lines)
436	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
437    if (vs_conf->uncached_first_inst)
438	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
439
440    BEGIN_BATCH(3 + 2);
441    EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
442    RELOC_BATCH(vs_conf->bo, domain, 0);
443    END_BATCH();
444
445    BEGIN_BATCH(6);
446    EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources);
447    EREG(ib, SQ_PGM_CF_OFFSET_VS, 0);
448    END_BATCH();
449}
450
451void
452ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain)
453{
454    RADEONInfoPtr info = RADEONPTR(pScrn);
455    uint32_t sq_pgm_resources;
456
457    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
458			(ps_conf->stack_size << STACK_SIZE_shift));
459
460    if (ps_conf->dx10_clamp)
461	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
462    if (ps_conf->fetch_cache_lines)
463	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
464    if (ps_conf->uncached_first_inst)
465	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
466    if (ps_conf->clamp_consts)
467	sq_pgm_resources |= CLAMP_CONSTS_bit;
468
469    BEGIN_BATCH(3 + 2);
470    EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
471    RELOC_BATCH(ps_conf->bo, domain, 0);
472    END_BATCH();
473
474    BEGIN_BATCH(9);
475    EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources);
476    EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
477    EREG(ib, SQ_PGM_CF_OFFSET_PS, 0);
478    END_BATCH();
479}
480
481void
482set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
483{
484    RADEONInfoPtr info = RADEONPTR(pScrn);
485    int i;
486    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
487
488    BEGIN_BATCH(2 + countreg);
489    PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
490    for (i = 0; i < countreg; i++)
491	EFLOAT(ib, const_buf[i]);
492    END_BATCH();
493}
494
495void
496set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
497{
498    RADEONInfoPtr info = RADEONPTR(pScrn);
499    /* bool register order is: ps, vs, gs; one register each
500     * 1 bits per bool; 32 bools each for ps, vs, gs.
501     */
502    BEGIN_BATCH(3);
503    EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
504    END_BATCH();
505}
506
507void
508set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain)
509{
510    RADEONInfoPtr info = RADEONPTR(pScrn);
511    uint32_t sq_vtx_constant_word2;
512
513    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
514			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
515			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
516			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
517			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
518    if (res->clamp_x)
519	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
520
521    if (res->format_comp_all)
522	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
523
524    if (res->srf_mode_all)
525	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
526
527    BEGIN_BATCH(9 + 2);
528    PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
529    E32(ib, res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
530    E32(ib, (res->vtx_num_entries << 2) - 1);			// 1: SIZE
531    E32(ib, sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
532    E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
533    E32(ib, 0);							// 4: n/a
534    E32(ib, 0);							// 5: n/a
535    E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
536    RELOC_BATCH(res->bo, domain, 0);
537    END_BATCH();
538}
539
540void
541set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain)
542{
543    RADEONInfoPtr info = RADEONPTR(pScrn);
544    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
545    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
546
547    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
548			     (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
549
550    if (tex_res->w)
551	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
552				  ((tex_res->w - 1) << TEX_WIDTH_shift));
553
554    if (tex_res->tile_type)
555	sq_tex_resource_word0 |= TILE_TYPE_bit;
556
557    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
558
559    if (tex_res->h)
560	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
561    if (tex_res->depth)
562	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
563
564    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
565			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
566			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
567			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
568			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
569			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
570			     (tex_res->request_size << REQUEST_SIZE_shift) |
571			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
572			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
573			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
574			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
575			     (tex_res->base_level << BASE_LEVEL_shift));
576
577    if (tex_res->srf_mode_all)
578	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
579    if (tex_res->force_degamma)
580	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
581
582    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
583			     (tex_res->base_array << BASE_ARRAY_shift) |
584			     (tex_res->last_array << LAST_ARRAY_shift));
585
586    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
587			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
588			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
589
590    if (tex_res->interlaced)
591	sq_tex_resource_word6 |= INTERLACED_bit;
592
593    BEGIN_BATCH(9 + 4);
594    PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
595    E32(ib, sq_tex_resource_word0);
596    E32(ib, sq_tex_resource_word1);
597    E32(ib, ((tex_res->base) >> 8));
598    E32(ib, ((tex_res->mip_base) >> 8));
599    E32(ib, sq_tex_resource_word4);
600    E32(ib, sq_tex_resource_word5);
601    E32(ib, sq_tex_resource_word6);
602    RELOC_BATCH(tex_res->bo, domain, 0);
603    RELOC_BATCH(tex_res->mip_bo, domain, 0);
604    END_BATCH();
605}
606
607void
608set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
609{
610    RADEONInfoPtr info = RADEONPTR(pScrn);
611    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
612
613    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
614			    (s->clamp_y       << CLAMP_Y_shift)					|
615			    (s->clamp_z       << CLAMP_Z_shift)					|
616			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
617			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
618			    (s->z_filter      << Z_FILTER_shift)	|
619			    (s->mip_filter    << MIP_FILTER_shift)				|
620			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
621			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
622			    (s->chroma_key    << CHROMA_KEY_shift));
623    if (s->point_sampling_clamp)
624	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
625    if (s->tex_array_override)
626	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
627    if (s->lod_uses_minor_axis)
628	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
629
630    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
631			    (s->max_lod       << MAX_LOD_shift)					|
632			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
633
634    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
635			    (s->perf_mip      << PERF_MIP_shift)	|
636			    (s->perf_z        << PERF_Z_shift));
637    if (s->mc_coord_truncate)
638	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
639    if (s->force_degamma)
640	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
641    if (s->high_precision_filter)
642	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
643    if (s->fetch_4)
644	sq_tex_sampler_word2 |= FETCH_4_bit;
645    if (s->sample_is_pcf)
646	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
647    if (s->type)
648	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
649
650    BEGIN_BATCH(5);
651    PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
652    E32(ib, sq_tex_sampler_word0);
653    E32(ib, sq_tex_sampler_word1);
654    E32(ib, sq_tex_sampler_word2);
655    END_BATCH();
656}
657
658//XXX deal with clip offsets in clip setup
659void
660set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
661{
662    RADEONInfoPtr info = RADEONPTR(pScrn);
663
664    BEGIN_BATCH(6);
665    EREG(ib, PA_SC_SCREEN_SCISSOR_TL,              ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
666						    (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
667    EREG(ib, PA_SC_SCREEN_SCISSOR_BR,              ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
668						    (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
669    END_BATCH();
670}
671
672void
673set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
674{
675    RADEONInfoPtr info = RADEONPTR(pScrn);
676
677    BEGIN_BATCH(6);
678    EREG(ib, PA_SC_VPORT_SCISSOR_0_TL +
679	 id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
680						(y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
681						WINDOW_OFFSET_DISABLE_bit));
682    EREG(ib, PA_SC_VPORT_SCISSOR_0_BR +
683	 id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
684						(y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
685    END_BATCH();
686}
687
688void
689set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
690{
691    RADEONInfoPtr info = RADEONPTR(pScrn);
692
693    BEGIN_BATCH(6);
694    EREG(ib, PA_SC_GENERIC_SCISSOR_TL,            ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
695						   (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
696						   WINDOW_OFFSET_DISABLE_bit));
697    EREG(ib, PA_SC_GENERIC_SCISSOR_BR,            ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
698						   (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
699    END_BATCH();
700}
701
702void
703set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
704{
705    RADEONInfoPtr info = RADEONPTR(pScrn);
706
707    BEGIN_BATCH(6);
708    EREG(ib, PA_SC_WINDOW_SCISSOR_TL,             ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
709						   (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
710						   WINDOW_OFFSET_DISABLE_bit));
711    EREG(ib, PA_SC_WINDOW_SCISSOR_BR,             ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
712						   (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
713    END_BATCH();
714}
715
716void
717set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
718{
719    RADEONInfoPtr info = RADEONPTR(pScrn);
720
721    BEGIN_BATCH(6);
722    EREG(ib, PA_SC_CLIPRECT_0_TL +
723	 id * PA_SC_CLIPRECT_0_TL_offset,     ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
724					       (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
725    EREG(ib, PA_SC_CLIPRECT_0_BR +
726	 id * PA_SC_CLIPRECT_0_BR_offset,     ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
727					       (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
728    END_BATCH();
729}
730
731/*
732 * Setup of default state
733 */
734
735void
736set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
737{
738    tex_resource_t tex_res;
739    shader_config_t fs_conf;
740    sq_config_t sq_conf;
741    int i;
742    RADEONInfoPtr info = RADEONPTR(pScrn);
743    struct radeon_accel_state *accel_state = info->accel_state;
744
745    if (accel_state->XInited3D)
746	return;
747
748    memset(&tex_res, 0, sizeof(tex_resource_t));
749    memset(&fs_conf, 0, sizeof(shader_config_t));
750
751    accel_state->XInited3D = TRUE;
752
753    start_3d(pScrn, accel_state->ib);
754
755    // ASIC specific setup, see drm
756    BEGIN_BATCH(15);
757    if (info->ChipFamily < CHIP_FAMILY_RV770) {
758	EREG(ib, TA_CNTL_AUX,                     (( 3 << GRADIENT_CREDIT_shift)		|
759						   (28 << TD_FIFO_CREDIT_shift)));
760	EREG(ib, VC_ENHANCE,                      0);
761	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
762	EREG(ib, DB_DEBUG,                        0x82000000); /* ? */
763	EREG(ib, DB_WATERMARKS,		        ((4  << DEPTH_FREE_shift)		|
764						 (16 << DEPTH_FLUSH_shift)		|
765						 (0  << FORCE_SUMMARIZE_shift)		|
766						 (4  << DEPTH_PENDING_FREE_shift)	|
767						 (16 << DEPTH_CACHELINE_FREE_shift)	|
768						 0));
769    } else {
770	EREG(ib, TA_CNTL_AUX,                      (( 2 << GRADIENT_CREDIT_shift)		|
771						    (28 << TD_FIFO_CREDIT_shift)));
772	EREG(ib, VC_ENHANCE,                       0);
773	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
774	EREG(ib, DB_DEBUG,                         0);
775	EREG(ib, DB_WATERMARKS,                    ((4  << DEPTH_FREE_shift)		|
776						    (16 << DEPTH_FLUSH_shift)		|
777						    (0  << FORCE_SUMMARIZE_shift)		|
778						    (4  << DEPTH_PENDING_FREE_shift)	|
779						    (4  << DEPTH_CACHELINE_FREE_shift)	|
780						    0));
781    }
782    END_BATCH();
783
784    // SQ
785    sq_conf.ps_prio = 0;
786    sq_conf.vs_prio = 1;
787    sq_conf.gs_prio = 2;
788    sq_conf.es_prio = 3;
789    // need to set stack/thread/gpr limits based on the asic
790    // for now just set them low enough so any card will work
791    // see r600_cp.c in the drm
792    switch (info->ChipFamily) {
793    case CHIP_FAMILY_R600:
794	sq_conf.num_ps_gprs = 192;
795	sq_conf.num_vs_gprs = 56;
796	sq_conf.num_temp_gprs = 4;
797	sq_conf.num_gs_gprs = 0;
798	sq_conf.num_es_gprs = 0;
799	sq_conf.num_ps_threads = 136;
800	sq_conf.num_vs_threads = 48;
801	sq_conf.num_gs_threads = 4;
802	sq_conf.num_es_threads = 4;
803	sq_conf.num_ps_stack_entries = 128;
804	sq_conf.num_vs_stack_entries = 128;
805	sq_conf.num_gs_stack_entries = 0;
806	sq_conf.num_es_stack_entries = 0;
807	break;
808    case CHIP_FAMILY_RV630:
809    case CHIP_FAMILY_RV635:
810	sq_conf.num_ps_gprs = 84;
811	sq_conf.num_vs_gprs = 36;
812	sq_conf.num_temp_gprs = 4;
813	sq_conf.num_gs_gprs = 0;
814	sq_conf.num_es_gprs = 0;
815	sq_conf.num_ps_threads = 144;
816	sq_conf.num_vs_threads = 40;
817	sq_conf.num_gs_threads = 4;
818	sq_conf.num_es_threads = 4;
819	sq_conf.num_ps_stack_entries = 40;
820	sq_conf.num_vs_stack_entries = 40;
821	sq_conf.num_gs_stack_entries = 32;
822	sq_conf.num_es_stack_entries = 16;
823	break;
824    case CHIP_FAMILY_RV610:
825    case CHIP_FAMILY_RV620:
826    case CHIP_FAMILY_RS780:
827    case CHIP_FAMILY_RS880:
828    default:
829	sq_conf.num_ps_gprs = 84;
830	sq_conf.num_vs_gprs = 36;
831	sq_conf.num_temp_gprs = 4;
832	sq_conf.num_gs_gprs = 0;
833	sq_conf.num_es_gprs = 0;
834	sq_conf.num_ps_threads = 136;
835	sq_conf.num_vs_threads = 48;
836	sq_conf.num_gs_threads = 4;
837	sq_conf.num_es_threads = 4;
838	sq_conf.num_ps_stack_entries = 40;
839	sq_conf.num_vs_stack_entries = 40;
840	sq_conf.num_gs_stack_entries = 32;
841	sq_conf.num_es_stack_entries = 16;
842	break;
843    case CHIP_FAMILY_RV670:
844	sq_conf.num_ps_gprs = 144;
845	sq_conf.num_vs_gprs = 40;
846	sq_conf.num_temp_gprs = 4;
847	sq_conf.num_gs_gprs = 0;
848	sq_conf.num_es_gprs = 0;
849	sq_conf.num_ps_threads = 136;
850	sq_conf.num_vs_threads = 48;
851	sq_conf.num_gs_threads = 4;
852	sq_conf.num_es_threads = 4;
853	sq_conf.num_ps_stack_entries = 40;
854	sq_conf.num_vs_stack_entries = 40;
855	sq_conf.num_gs_stack_entries = 32;
856	sq_conf.num_es_stack_entries = 16;
857	break;
858    case CHIP_FAMILY_RV770:
859	sq_conf.num_ps_gprs = 192;
860	sq_conf.num_vs_gprs = 56;
861	sq_conf.num_temp_gprs = 4;
862	sq_conf.num_gs_gprs = 0;
863	sq_conf.num_es_gprs = 0;
864	sq_conf.num_ps_threads = 188;
865	sq_conf.num_vs_threads = 60;
866	sq_conf.num_gs_threads = 0;
867	sq_conf.num_es_threads = 0;
868	sq_conf.num_ps_stack_entries = 256;
869	sq_conf.num_vs_stack_entries = 256;
870	sq_conf.num_gs_stack_entries = 0;
871	sq_conf.num_es_stack_entries = 0;
872	break;
873    case CHIP_FAMILY_RV730:
874    case CHIP_FAMILY_RV740:
875	sq_conf.num_ps_gprs = 84;
876	sq_conf.num_vs_gprs = 36;
877	sq_conf.num_temp_gprs = 4;
878	sq_conf.num_gs_gprs = 0;
879	sq_conf.num_es_gprs = 0;
880	sq_conf.num_ps_threads = 188;
881	sq_conf.num_vs_threads = 60;
882	sq_conf.num_gs_threads = 0;
883	sq_conf.num_es_threads = 0;
884	sq_conf.num_ps_stack_entries = 128;
885	sq_conf.num_vs_stack_entries = 128;
886	sq_conf.num_gs_stack_entries = 0;
887	sq_conf.num_es_stack_entries = 0;
888	break;
889    case CHIP_FAMILY_RV710:
890	sq_conf.num_ps_gprs = 192;
891	sq_conf.num_vs_gprs = 56;
892	sq_conf.num_temp_gprs = 4;
893	sq_conf.num_gs_gprs = 0;
894	sq_conf.num_es_gprs = 0;
895	sq_conf.num_ps_threads = 144;
896	sq_conf.num_vs_threads = 48;
897	sq_conf.num_gs_threads = 0;
898	sq_conf.num_es_threads = 0;
899	sq_conf.num_ps_stack_entries = 128;
900	sq_conf.num_vs_stack_entries = 128;
901	sq_conf.num_gs_stack_entries = 0;
902	sq_conf.num_es_stack_entries = 0;
903	break;
904    }
905
906    sq_setup(pScrn, ib, &sq_conf);
907
908    BEGIN_BATCH(59);
909    EREG(ib, SQ_VTX_BASE_VTX_LOC,                 0);
910    EREG(ib, SQ_VTX_START_INST_LOC,               0);
911
912    PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9);
913    E32(ib, 0);							// SQ_ESGS_RING_ITEMSIZE
914    E32(ib, 0);							// SQ_GSVS_RING_ITEMSIZE
915    E32(ib, 0);							// SQ_ESTMP_RING_ITEMSIZE
916    E32(ib, 0);							// SQ_GSTMP_RING_ITEMSIZE
917    E32(ib, 0);							// SQ_VSTMP_RING_ITEMSIZE
918    E32(ib, 0);							// SQ_PSTMP_RING_ITEMSIZE
919    E32(ib, 0);							// SQ_FBUF_RING_ITEMSIZE
920    E32(ib, 0);							// SQ_REDUC_RING_ITEMSIZE
921    E32(ib, 0);							// SQ_GS_VERT_ITEMSIZE
922
923    // DB
924    EREG(ib, DB_DEPTH_INFO,                       0);
925    EREG(ib, DB_STENCIL_CLEAR,                    0);
926    EREG(ib, DB_DEPTH_CLEAR,                      0);
927    EREG(ib, DB_STENCILREFMASK,                   0);
928    EREG(ib, DB_STENCILREFMASK_BF,                0);
929    EREG(ib, DB_DEPTH_CONTROL,                    0);
930    EREG(ib, DB_RENDER_CONTROL,                   STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
931    if (info->ChipFamily < CHIP_FAMILY_RV770)
932	EREG(ib, DB_RENDER_OVERRIDE,              FORCE_SHADER_Z_ORDER_bit);
933    else
934	EREG(ib, DB_RENDER_OVERRIDE,              0);
935    EREG(ib, DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
936						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
937						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
938						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
939
940
941    EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
942				 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
943
944
945    // SX
946    EREG(ib, SX_ALPHA_TEST_CONTROL,               0);
947    EREG(ib, SX_ALPHA_REF,                        0);
948
949    // CB
950    PACK0(ib, CB_BLEND_RED, 4);
951    E32(ib, 0x00000000);
952    E32(ib, 0x00000000);
953    E32(ib, 0x00000000);
954    E32(ib, 0x00000000);
955    END_BATCH();
956
957    if (info->ChipFamily < CHIP_FAMILY_RV770) {
958	BEGIN_BATCH(11);
959	PACK0(ib, CB_FOG_RED, 3);
960	E32(ib, 0x00000000);
961	E32(ib, 0x00000000);
962	E32(ib, 0x00000000);
963	PACK0(ib, CB_CLEAR_RED, 4);
964	EFLOAT(ib, 1.0);						/* WTF? */
965	EFLOAT(ib, 0.0);
966	EFLOAT(ib, 1.0);
967	EFLOAT(ib, 1.0);
968	END_BATCH();
969    }
970
971    BEGIN_BATCH(18);
972    PACK0(ib, CB_CLRCMP_CONTROL, 4);
973    E32(ib, 1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
974    E32(ib, 0);							// CB_CLRCMP_SRC
975    E32(ib, 0);							// CB_CLRCMP_DST
976    E32(ib, 0);							// CB_CLRCMP_MSK
977
978    EREG(ib, CB_SHADER_MASK,                      (0xf << OUTPUT0_ENABLE_shift));
979    EREG(ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
980
981
982    // SC
983    EREG(ib, PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
984						   (0 << WINDOW_Y_OFFSET_shift)));
985
986    EREG(ib, PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
987    END_BATCH();
988
989    /* clip boolean is set to always visible -> doesn't matter */
990    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
991	set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
992
993    BEGIN_BATCH(3);
994    if (info->ChipFamily < CHIP_FAMILY_RV770)
995	EREG(ib, R7xx_PA_SC_EDGERULE,             0x00000000);
996    else
997	EREG(ib, R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
998    END_BATCH();
999
1000    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) {
1001	set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
1002	BEGIN_BATCH(4);
1003	PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2);
1004	EFLOAT(ib, 0.0);
1005	EFLOAT(ib, 1.0);
1006	END_BATCH();
1007    }
1008
1009    BEGIN_BATCH(15);
1010    if (info->ChipFamily < CHIP_FAMILY_RV770)
1011	EREG(ib, PA_SC_MODE_CNTL,                 (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
1012    else
1013	EREG(ib, PA_SC_MODE_CNTL,                 (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
1014						   0x00500000)); /* ? */
1015
1016    EREG(ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
1017				  (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
1018				  (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
1019
1020
1021    EREG(ib, PA_SC_LINE_CNTL,                     0);
1022    EREG(ib, PA_SC_AA_CONFIG,                     0);
1023    EREG(ib, PA_SC_AA_MASK,                       0xFFFFFFFF);
1024    END_BATCH();
1025
1026    //XXX: double check this
1027    if (info->ChipFamily > CHIP_FAMILY_R600) {
1028	BEGIN_BATCH(6);
1029	EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX,       0);
1030	EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M,   0);
1031	END_BATCH();
1032    }
1033
1034    BEGIN_BATCH(83);
1035    EREG(ib, PA_SC_LINE_STIPPLE,                  0);
1036    EREG(ib, PA_SC_MPASS_PS_CNTL,                 0);
1037
1038    // CL
1039    PACK0(ib, PA_CL_VPORT_XSCALE_0, 6);
1040    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_XSCALE
1041    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_XOFFSET
1042    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_YSCALE
1043    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_YOFFSET
1044    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_ZSCALE
1045    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_ZOFFSET
1046    EREG(ib, PA_CL_VTE_CNTL,                      0);
1047    EREG(ib, PA_CL_VS_OUT_CNTL,                   0);
1048    EREG(ib, PA_CL_NANINF_CNTL,                   0);
1049    PACK0(ib, PA_CL_GB_VERT_CLIP_ADJ, 4);
1050    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1051    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_DISC_ADJ
1052    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1053    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1054
1055    /* Scissor / viewport */
1056    EREG(ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
1057    EREG(ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
1058
1059    // SU
1060    EREG(ib, PA_SU_SC_MODE_CNTL,                  FACE_bit);
1061    EREG(ib, PA_SU_POINT_SIZE,                    0);
1062    EREG(ib, PA_SU_POINT_MINMAX,                  0);
1063    EREG(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL,       0);
1064    EREG(ib, PA_SU_POLY_OFFSET_BACK_SCALE,        0);
1065    EREG(ib, PA_SU_POLY_OFFSET_FRONT_SCALE,       0);
1066    EREG(ib, PA_SU_POLY_OFFSET_BACK_OFFSET,       0);
1067    EREG(ib, PA_SU_POLY_OFFSET_FRONT_OFFSET,      0);
1068
1069    EREG(ib, PA_SU_LINE_CNTL,                     (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */
1070    EREG(ib, PA_SU_VTX_CNTL,                      ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit |
1071						   (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
1072    EREG(ib, PA_SU_POLY_OFFSET_CLAMP,             0);
1073
1074    // SPI
1075    if (info->ChipFamily < CHIP_FAMILY_RV770)
1076	EREG(ib, R7xx_SPI_THREAD_GROUPING,        0);
1077    else
1078	EREG(ib, R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
1079
1080    EREG(ib, SPI_INPUT_Z,                         0);
1081    EREG(ib, SPI_FOG_CNTL,                        0);
1082    EREG(ib, SPI_FOG_FUNC_SCALE,                  0);
1083    EREG(ib, SPI_FOG_FUNC_BIAS,                   0);
1084    END_BATCH();
1085
1086    // clear FS
1087    fs_conf.bo = accel_state->shaders_bo;
1088    fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1089
1090    // VGT
1091    BEGIN_BATCH(75);
1092    EREG(ib, VGT_MAX_VTX_INDX,                    2048); /* XXX set to a reasonably large number of indices */
1093    EREG(ib, VGT_MIN_VTX_INDX,                    0);
1094    EREG(ib, VGT_INDX_OFFSET,                     0);
1095    EREG(ib, VGT_INSTANCE_STEP_RATE_0,            0);
1096    EREG(ib, VGT_INSTANCE_STEP_RATE_1,            0);
1097    EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX,        0);
1098    EREG(ib, VGT_OUTPUT_PATH_CNTL,                0);
1099    EREG(ib, VGT_GS_MODE,                         0);
1100    EREG(ib, VGT_HOS_CNTL,                        0);
1101    EREG(ib, VGT_HOS_MAX_TESS_LEVEL,              0);
1102    EREG(ib, VGT_HOS_MIN_TESS_LEVEL,              0);
1103    EREG(ib, VGT_HOS_REUSE_DEPTH,                 0);
1104    EREG(ib, VGT_GROUP_PRIM_TYPE,                 0);
1105    EREG(ib, VGT_GROUP_FIRST_DECR,                0);
1106    EREG(ib, VGT_GROUP_DECR,                      0);
1107    EREG(ib, VGT_GROUP_VECT_0_CNTL,               0);
1108    EREG(ib, VGT_GROUP_VECT_1_CNTL,               0);
1109    EREG(ib, VGT_GROUP_VECT_0_FMT_CNTL,           0);
1110    EREG(ib, VGT_GROUP_VECT_1_FMT_CNTL,           0);
1111    EREG(ib, VGT_PRIMITIVEID_EN,                  0);
1112    EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN,          0);
1113    EREG(ib, VGT_STRMOUT_EN,                      0);
1114    EREG(ib, VGT_REUSE_OFF,                       0);
1115    EREG(ib, VGT_VTX_CNT_EN,                      0);
1116    EREG(ib, VGT_STRMOUT_BUFFER_EN,               0);
1117    END_BATCH();
1118}
1119
1120
1121/*
1122 * Commands
1123 */
1124
1125void
1126draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
1127{
1128    RADEONInfoPtr info = RADEONPTR(pScrn);
1129    uint32_t i, count;
1130
1131    // calculate num of packets
1132    count = 2;
1133    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1134	count += (draw_conf->num_indices + 1) / 2;
1135    else
1136	count += draw_conf->num_indices;
1137
1138    BEGIN_BATCH(8 + count);
1139    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1140    PACK3(ib, IT_INDEX_TYPE, 1);
1141    E32(ib, draw_conf->index_type);
1142    PACK3(ib, IT_NUM_INSTANCES, 1);
1143    E32(ib, draw_conf->num_instances);
1144
1145    PACK3(ib, IT_DRAW_INDEX_IMMD, count);
1146    E32(ib, draw_conf->num_indices);
1147    E32(ib, draw_conf->vgt_draw_initiator);
1148
1149    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1150	for (i = 0; i < draw_conf->num_indices; i += 2) {
1151	    if ((i + 1) == draw_conf->num_indices)
1152		E32(ib, indices[i]);
1153	    else
1154		E32(ib, (indices[i] | (indices[i + 1] << 16)));
1155	}
1156    } else {
1157	for (i = 0; i < draw_conf->num_indices; i++)
1158	    E32(ib, indices[i]);
1159    }
1160    END_BATCH();
1161}
1162
1163void
1164draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
1165{
1166    RADEONInfoPtr info = RADEONPTR(pScrn);
1167
1168    BEGIN_BATCH(10);
1169    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1170    PACK3(ib, IT_INDEX_TYPE, 1);
1171    E32(ib, draw_conf->index_type);
1172    PACK3(ib, IT_NUM_INSTANCES, 1);
1173    E32(ib, draw_conf->num_instances);
1174    PACK3(ib, IT_DRAW_INDEX_AUTO, 2);
1175    E32(ib, draw_conf->num_indices);
1176    E32(ib, draw_conf->vgt_draw_initiator);
1177    END_BATCH();
1178}
1179
1180Bool
1181r600_vb_get(ScrnInfoPtr pScrn)
1182{
1183    RADEONInfoPtr info = RADEONPTR(pScrn);
1184    struct radeon_accel_state *accel_state = info->accel_state;
1185
1186    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
1187	(accel_state->ib->idx*accel_state->ib->total)+
1188	(accel_state->ib->total / 2);
1189    accel_state->vb_total = (accel_state->ib->total / 2);
1190    accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address +
1191				    (accel_state->ib->total / 2));
1192    accel_state->vb_offset = 0;
1193    return TRUE;
1194}
1195
1196void
1197r600_vb_discard(ScrnInfoPtr pScrn)
1198{
1199    RADEONInfoPtr info = RADEONPTR(pScrn);
1200
1201    info->accel_state->vb_start_op = -1;
1202}
1203
1204
1205
1206int
1207r600_cp_start(ScrnInfoPtr pScrn)
1208{
1209    RADEONInfoPtr info = RADEONPTR(pScrn);
1210    struct radeon_accel_state *accel_state = info->accel_state;
1211
1212#if defined(XF86DRM_MODE)
1213    if (info->cs) {
1214	if (CS_FULL(info->cs)) {
1215	    radeon_cs_flush_indirect(pScrn);
1216	}
1217	accel_state->ib_reset_op = info->cs->cdw;
1218	accel_state->vb_start_op = accel_state->vb_offset;
1219    } else
1220#endif
1221    {
1222	accel_state->ib = RADEONCPGetBuffer(pScrn);
1223	if (!r600_vb_get(pScrn)) {
1224	    return -1;
1225	}
1226	accel_state->vb_start_op = accel_state->vb_offset;
1227    }
1228    return 0;
1229}
1230
1231void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1232{
1233    RADEONInfoPtr info = RADEONPTR(pScrn);
1234    struct radeon_accel_state *accel_state = info->accel_state;
1235    draw_config_t   draw_conf;
1236    vtx_resource_t  vtx_res;
1237
1238    if (accel_state->vb_start_op == -1)
1239	return;
1240
1241    CLEAR (draw_conf);
1242    CLEAR (vtx_res);
1243
1244    if (accel_state->vb_offset == accel_state->vb_start_op) {
1245        R600IBDiscard(pScrn, accel_state->ib);
1246	r600_vb_discard(pScrn);
1247	return;
1248    }
1249
1250    /* flush vertex cache */
1251    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
1252	(info->ChipFamily == CHIP_FAMILY_RV620) ||
1253	(info->ChipFamily == CHIP_FAMILY_RS780) ||
1254	(info->ChipFamily == CHIP_FAMILY_RS880) ||
1255	(info->ChipFamily == CHIP_FAMILY_RV710))
1256	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1257			    accel_state->vb_offset, accel_state->vb_mc_addr,
1258			    accel_state->vb_bo,
1259			    RADEON_GEM_DOMAIN_GTT, 0);
1260    else
1261	cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
1262			    accel_state->vb_offset, accel_state->vb_mc_addr,
1263			    accel_state->vb_bo,
1264			    RADEON_GEM_DOMAIN_GTT, 0);
1265
1266    /* Vertex buffer setup */
1267    accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
1268    vtx_res.id              = SQ_VTX_RESOURCE_vs;
1269    vtx_res.vtx_size_dw     = vtx_size / 4;
1270    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
1271    vtx_res.mem_req_size    = 1;
1272    vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
1273    vtx_res.bo              = accel_state->vb_bo;
1274    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1275
1276    /* Draw */
1277    draw_conf.prim_type          = DI_PT_RECTLIST;
1278    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1279    draw_conf.num_instances      = 1;
1280    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1281    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1282
1283    draw_auto(pScrn, accel_state->ib, &draw_conf);
1284
1285    /* XXX drm should handle this in fence submit */
1286    wait_3d_idle_clean(pScrn, accel_state->ib);
1287
1288    /* sync dst surface */
1289    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1290			accel_state->dst_size, accel_state->dst_obj.offset,
1291			accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1292
1293    accel_state->vb_start_op = -1;
1294    accel_state->ib_reset_op = 0;
1295
1296#if KMS_MULTI_OP
1297    if (!info->cs)
1298#endif
1299	R600CPFlushIndirect(pScrn, accel_state->ib);
1300}
1301
1302void r600_vb_no_space(ScrnInfoPtr pScrn, int vert_size)
1303{
1304#ifdef XF86DRM_MODE
1305    RADEONInfoPtr info = RADEONPTR(pScrn);
1306    struct radeon_accel_state *accel_state = info->accel_state;
1307
1308    if (info->cs) {
1309	if (accel_state->vb_bo) {
1310	    if (accel_state->vb_start_op != accel_state->vb_offset) {
1311		r600_finish_op(pScrn, vert_size);
1312		accel_state->ib_reset_op = info->cs->cdw;
1313	    }
1314
1315	    /* release the current VBO */
1316	    radeon_vbo_put(pScrn);
1317	}
1318
1319	/* get a new one */
1320	radeon_vbo_get(pScrn);
1321	return;
1322    }
1323#endif
1324
1325    r600_finish_op(pScrn, vert_size);
1326    r600_cp_start(pScrn);
1327}
1328