r6xx_accel.c revision 2f39173d
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *          Matthias Hopf <mhopf@suse.de>
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#include "xf86.h"
31
32#include <errno.h>
33
34#include "radeon.h"
35#include "r600_shader.h"
36#include "radeon_reg.h"
37#include "r600_reg.h"
38#include "r600_state.h"
39
40#include "radeon_drm.h"
41#include "radeon_vbo.h"
42
43/* we try and batch operations together under KMS -
44   but it doesn't work yet without misrendering */
45#define KMS_MULTI_OP 1
46
47/* Flush the indirect buffer to the kernel for submission to the card */
48void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib)
49{
50    RADEONInfoPtr  info = RADEONPTR(pScrn);
51    drmBufPtr          buffer = ib;
52    int                start  = 0;
53    drm_radeon_indirect_t  indirect;
54
55#if defined(XF86DRM_MODE)
56    if (info->cs) {
57	radeon_cs_flush_indirect(pScrn);
58	return;
59    }
60#endif
61
62    if (!buffer) return;
63
64    //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
65    //       buffer->idx);
66
67    while (buffer->used & 0x3c){
68	BEGIN_BATCH(1);
69        E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */
70	END_BATCH();
71    }
72
73    //ErrorF("buffer bytes: %d\n", buffer->used);
74
75    indirect.idx     = buffer->idx;
76    indirect.start   = start;
77    indirect.end     = buffer->used;
78    indirect.discard = 1;
79
80    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
81			&indirect, sizeof(drm_radeon_indirect_t));
82
83}
84
85void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
86{
87#if defined(XF86DRM_MODE)
88    int ret;
89    RADEONInfoPtr info = RADEONPTR(pScrn);
90    if (info->cs) {
91	if (info->accel_state->ib_reset_op) {
92	    /* if we have data just reset the CS and ignore the operation */
93	    info->cs->cdw = info->accel_state->ib_reset_op;
94	    info->accel_state->ib_reset_op = 0;
95	    return;
96	}
97	if (info->accel_state->vb_ptr) {
98	    info->accel_state->vb_ptr = NULL;
99	}
100
101	info->accel_state->vb_offset = 0;
102	info->accel_state->vb_start_op = -1;
103
104	if (CS_FULL(info->cs)) {
105	    radeon_cs_flush_indirect(pScrn);
106	    return;
107	}
108	radeon_cs_erase(info->cs);
109	ret = radeon_cs_space_check(info->cs);
110	if (ret)
111	    ErrorF("space check failed in flush\n");
112	if (info->dri2.enabled) {
113		info->accel_state->XInited3D = FALSE;
114		info->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN;
115	}
116    }
117#endif
118    if (!ib) return;
119
120    ib->used = 0;
121    R600CPFlushIndirect(pScrn, ib);
122}
123
124void
125wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
126{
127    RADEONInfoPtr info = RADEONPTR(pScrn);
128
129    //flush caches, don't generate timestamp
130    BEGIN_BATCH(5);
131    PACK3(ib, IT_EVENT_WRITE, 1);
132    E32(ib, CACHE_FLUSH_AND_INV_EVENT);
133    // wait for 3D idle clean
134    EREG(ib, WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
135						   WAIT_3D_IDLECLEAN_bit));
136    END_BATCH();
137}
138
139void
140wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
141{
142    RADEONInfoPtr info = RADEONPTR(pScrn);
143
144    BEGIN_BATCH(3);
145    EREG(ib, WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
146    END_BATCH();
147}
148
149void
150start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
151{
152    RADEONInfoPtr info = RADEONPTR(pScrn);
153
154    if (info->ChipFamily < CHIP_FAMILY_RV770) {
155	BEGIN_BATCH(5);
156	PACK3(ib, IT_START_3D_CMDBUF, 1);
157	E32(ib, 0);
158    } else
159	BEGIN_BATCH(3);
160
161    PACK3(ib, IT_CONTEXT_CONTROL, 2);
162    E32(ib, 0x80000000);
163    E32(ib, 0x80000000);
164    END_BATCH();
165
166}
167
168/*
169 * Setup of functional groups
170 */
171
172// asic stack/thread/gpr limits - need to query the drm
173static void
174sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
175{
176    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
177    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
178    RADEONInfoPtr info = RADEONPTR(pScrn);
179
180    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
181	(info->ChipFamily == CHIP_FAMILY_RV620) ||
182	(info->ChipFamily == CHIP_FAMILY_RS780) ||
183	(info->ChipFamily == CHIP_FAMILY_RS880) ||
184	(info->ChipFamily == CHIP_FAMILY_RV710))
185	sq_config = 0;						// no VC
186    else
187	sq_config = VC_ENABLE_bit;
188
189    sq_config |= (DX9_CONSTS_bit |
190		  ALU_INST_PREFER_VECTOR_bit |
191		  (sq_conf->ps_prio << PS_PRIO_shift) |
192		  (sq_conf->vs_prio << VS_PRIO_shift) |
193		  (sq_conf->gs_prio << GS_PRIO_shift) |
194		  (sq_conf->es_prio << ES_PRIO_shift));
195
196    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
197			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
198			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
199    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
200			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
201
202    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
203			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
204			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
205			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
206
207    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
208				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
209
210    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
211				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
212
213    BEGIN_BATCH(8);
214    PACK0(ib, SQ_CONFIG, 6);
215    E32(ib, sq_config);
216    E32(ib, sq_gpr_resource_mgmt_1);
217    E32(ib, sq_gpr_resource_mgmt_2);
218    E32(ib, sq_thread_resource_mgmt);
219    E32(ib, sq_stack_resource_mgmt_1);
220    E32(ib, sq_stack_resource_mgmt_2);
221    END_BATCH();
222}
223
224void
225set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain)
226{
227    uint32_t cb_color_info;
228    int pitch, slice, h;
229    RADEONInfoPtr info = RADEONPTR(pScrn);
230
231    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
232		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
233		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
234		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
235		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
236		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
237    if (cb_conf->read_size)
238	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
239    if (cb_conf->blend_clamp)
240	cb_color_info |= BLEND_CLAMP_bit;
241    if (cb_conf->clear_color)
242	cb_color_info |= CLEAR_COLOR_bit;
243    if (cb_conf->blend_bypass)
244	cb_color_info |= BLEND_BYPASS_bit;
245    if (cb_conf->blend_float32)
246	cb_color_info |= BLEND_FLOAT32_bit;
247    if (cb_conf->simple_float)
248	cb_color_info |= SIMPLE_FLOAT_bit;
249    if (cb_conf->round_mode)
250	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
251    if (cb_conf->tile_compact)
252	cb_color_info |= TILE_COMPACT_bit;
253    if (cb_conf->source_format)
254	cb_color_info |= SOURCE_FORMAT_bit;
255
256    pitch = (cb_conf->w / 8) - 1;
257    h = RADEON_ALIGN(cb_conf->h, 8);
258    slice = ((cb_conf->w * h) / 64) - 1;
259
260    BEGIN_BATCH(3 + 2);
261    EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
262    RELOC_BATCH(cb_conf->bo, 0, domain);
263    END_BATCH();
264
265    // rv6xx workaround
266    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
267        (info->ChipFamily < CHIP_FAMILY_RV770)) {
268        BEGIN_BATCH(2);
269        PACK3(ib, IT_SURFACE_BASE_UPDATE, 1);
270        E32(ib, (2 << cb_conf->id));
271        END_BATCH();
272    }
273    /* Set CMASK & TILE buffer to the offset of color buffer as
274     * we don't use those this shouldn't cause any issue and we
275     * then have a valid cmd stream
276     */
277    BEGIN_BATCH(3 + 2);
278    EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
279    RELOC_BATCH(cb_conf->bo, 0, domain);
280    END_BATCH();
281    BEGIN_BATCH(3 + 2);
282    EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
283    RELOC_BATCH(cb_conf->bo, 0, domain);
284    END_BATCH();
285    BEGIN_BATCH(12);
286    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
287    EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
288						    (slice << SLICE_TILE_MAX_shift)));
289    EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
290						    (0    << SLICE_MAX_shift)));
291    EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
292    EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
293						    (0    << FMASK_TILE_MAX_shift)));
294    END_BATCH();
295}
296
297void
298cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr,
299		    struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
300{
301    RADEONInfoPtr info = RADEONPTR(pScrn);
302    uint32_t cp_coher_size;
303    if (size == 0xffffffff)
304	cp_coher_size = 0xffffffff;
305    else
306	cp_coher_size = ((size + 255) >> 8);
307
308    BEGIN_BATCH(5 + 2);
309    PACK3(ib, IT_SURFACE_SYNC, 4);
310    E32(ib, sync_type);
311    E32(ib, cp_coher_size);
312    E32(ib, (mc_addr >> 8));
313    E32(ib, 10); /* poll interval */
314    RELOC_BATCH(bo, rdomains, wdomain);
315    END_BATCH();
316}
317
318/* inserts a wait for vline in the command stream */
319void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
320			xf86CrtcPtr crtc, int start, int stop)
321{
322    RADEONInfoPtr  info = RADEONPTR(pScrn);
323    uint32_t offset;
324
325    if (!crtc)
326        return;
327
328    if (stop < start)
329        return;
330
331    if (!crtc->enabled)
332        return;
333
334    if (info->cs) {
335        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
336	    return;
337    } else {
338#ifdef USE_EXA
339	if (info->useEXA)
340	    offset = exaGetPixmapOffset(pPix);
341	else
342#endif
343	    offset = pPix->devPrivate.ptr - info->FB;
344
345	/* if drawing to front buffer */
346	if (offset != 0)
347	    return;
348    }
349
350    start = max(start, 0);
351    stop = min(stop, crtc->mode.VDisplay);
352
353    if (start > crtc->mode.VDisplay)
354        return;
355
356#if defined(XF86DRM_MODE)
357    if (info->cs) {
358	drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
359
360	BEGIN_BATCH(11);
361	/* set the VLINE range */
362	EREG(ib, AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
363	     (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
364	     (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
365
366	/* tell the CP to poll the VLINE state register */
367	PACK3(ib, IT_WAIT_REG_MEM, 6);
368	E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
369	E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
370	E32(ib, 0);
371	E32(ib, 0);                          // Ref value
372	E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
373	E32(ib, 10);                         // Wait interval
374	/* add crtc reloc */
375	PACK3(ib, IT_NOP, 1);
376	E32(ib, drmmode_crtc->mode_crtc->crtc_id);
377	END_BATCH();
378    } else
379#endif
380    {
381	RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
382
383	BEGIN_BATCH(9);
384	/* set the VLINE range */
385	EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset,
386	     (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
387	     (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
388
389	/* tell the CP to poll the VLINE state register */
390	PACK3(ib, IT_WAIT_REG_MEM, 6);
391	E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
392	E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset));
393	E32(ib, 0);
394	E32(ib, 0);                          // Ref value
395	E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
396	E32(ib, 10);                         // Wait interval
397	END_BATCH();
398    }
399}
400
401void
402fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain)
403{
404    RADEONInfoPtr info = RADEONPTR(pScrn);
405    uint32_t sq_pgm_resources;
406
407    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
408			(fs_conf->stack_size << STACK_SIZE_shift));
409
410    if (fs_conf->dx10_clamp)
411	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
412
413    BEGIN_BATCH(3 + 2);
414    EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
415    RELOC_BATCH(fs_conf->bo, domain, 0);
416    END_BATCH();
417
418    BEGIN_BATCH(6);
419    EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources);
420    EREG(ib, SQ_PGM_CF_OFFSET_FS, 0);
421    END_BATCH();
422}
423
424void
425vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain)
426{
427    RADEONInfoPtr info = RADEONPTR(pScrn);
428    uint32_t sq_pgm_resources;
429
430    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
431			(vs_conf->stack_size << STACK_SIZE_shift));
432
433    if (vs_conf->dx10_clamp)
434	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
435    if (vs_conf->fetch_cache_lines)
436	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
437    if (vs_conf->uncached_first_inst)
438	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
439
440    BEGIN_BATCH(3 + 2);
441    EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
442    RELOC_BATCH(vs_conf->bo, domain, 0);
443    END_BATCH();
444
445    BEGIN_BATCH(6);
446    EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources);
447    EREG(ib, SQ_PGM_CF_OFFSET_VS, 0);
448    END_BATCH();
449}
450
451void
452ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain)
453{
454    RADEONInfoPtr info = RADEONPTR(pScrn);
455    uint32_t sq_pgm_resources;
456
457    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
458			(ps_conf->stack_size << STACK_SIZE_shift));
459
460    if (ps_conf->dx10_clamp)
461	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
462    if (ps_conf->fetch_cache_lines)
463	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
464    if (ps_conf->uncached_first_inst)
465	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
466    if (ps_conf->clamp_consts)
467	sq_pgm_resources |= CLAMP_CONSTS_bit;
468
469    BEGIN_BATCH(3 + 2);
470    EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
471    RELOC_BATCH(ps_conf->bo, domain, 0);
472    END_BATCH();
473
474    BEGIN_BATCH(9);
475    EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources);
476    EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
477    EREG(ib, SQ_PGM_CF_OFFSET_PS, 0);
478    END_BATCH();
479}
480
481void
482set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
483{
484    RADEONInfoPtr info = RADEONPTR(pScrn);
485    int i;
486    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
487
488    BEGIN_BATCH(2 + countreg);
489    PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
490    for (i = 0; i < countreg; i++)
491	EFLOAT(ib, const_buf[i]);
492    END_BATCH();
493}
494
495void
496set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
497{
498    RADEONInfoPtr info = RADEONPTR(pScrn);
499    /* bool register order is: ps, vs, gs; one register each
500     * 1 bits per bool; 32 bools each for ps, vs, gs.
501     */
502    BEGIN_BATCH(3);
503    EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
504    END_BATCH();
505}
506
507void
508set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain)
509{
510    RADEONInfoPtr info = RADEONPTR(pScrn);
511    uint32_t sq_vtx_constant_word2;
512
513    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
514			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
515			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
516			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
517			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
518    if (res->clamp_x)
519	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
520
521    if (res->format_comp_all)
522	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
523
524    if (res->srf_mode_all)
525	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
526
527    BEGIN_BATCH(9 + 2);
528    PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
529    E32(ib, res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
530    E32(ib, (res->vtx_num_entries << 2) - 1);			// 1: SIZE
531    E32(ib, sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
532    E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
533    E32(ib, 0);							// 4: n/a
534    E32(ib, 0);							// 5: n/a
535    E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
536    RELOC_BATCH(res->bo, domain, 0);
537    END_BATCH();
538}
539
540void
541set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain)
542{
543    RADEONInfoPtr info = RADEONPTR(pScrn);
544    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
545    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
546
547    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
548			     (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
549
550    if (tex_res->w)
551	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
552				  ((tex_res->w - 1) << TEX_WIDTH_shift));
553
554    if (tex_res->tile_type)
555	sq_tex_resource_word0 |= TILE_TYPE_bit;
556
557    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
558
559    if (tex_res->h)
560	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
561    if (tex_res->depth)
562	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
563
564    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
565			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
566			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
567			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
568			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
569			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
570			     (tex_res->request_size << REQUEST_SIZE_shift) |
571			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
572			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
573			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
574			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
575			     (tex_res->base_level << BASE_LEVEL_shift));
576
577    if (tex_res->srf_mode_all)
578	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
579    if (tex_res->force_degamma)
580	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
581
582    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
583			     (tex_res->base_array << BASE_ARRAY_shift) |
584			     (tex_res->last_array << LAST_ARRAY_shift));
585
586    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
587			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
588			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
589
590    if (tex_res->interlaced)
591	sq_tex_resource_word6 |= INTERLACED_bit;
592
593    BEGIN_BATCH(9 + 4);
594    PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
595    E32(ib, sq_tex_resource_word0);
596    E32(ib, sq_tex_resource_word1);
597    E32(ib, ((tex_res->base) >> 8));
598    E32(ib, ((tex_res->mip_base) >> 8));
599    E32(ib, sq_tex_resource_word4);
600    E32(ib, sq_tex_resource_word5);
601    E32(ib, sq_tex_resource_word6);
602    RELOC_BATCH(tex_res->bo, domain, 0);
603    RELOC_BATCH(tex_res->mip_bo, domain, 0);
604    END_BATCH();
605}
606
607void
608set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
609{
610    RADEONInfoPtr info = RADEONPTR(pScrn);
611    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
612
613    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
614			    (s->clamp_y       << CLAMP_Y_shift)					|
615			    (s->clamp_z       << CLAMP_Z_shift)					|
616			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
617			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
618			    (s->z_filter      << Z_FILTER_shift)	|
619			    (s->mip_filter    << MIP_FILTER_shift)				|
620			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
621			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
622			    (s->chroma_key    << CHROMA_KEY_shift));
623    if (s->point_sampling_clamp)
624	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
625    if (s->tex_array_override)
626	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
627    if (s->lod_uses_minor_axis)
628	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
629
630    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
631			    (s->max_lod       << MAX_LOD_shift)					|
632			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
633
634    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
635			    (s->perf_mip      << PERF_MIP_shift)	|
636			    (s->perf_z        << PERF_Z_shift));
637    if (s->mc_coord_truncate)
638	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
639    if (s->force_degamma)
640	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
641    if (s->high_precision_filter)
642	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
643    if (s->fetch_4)
644	sq_tex_sampler_word2 |= FETCH_4_bit;
645    if (s->sample_is_pcf)
646	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
647    if (s->type)
648	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
649
650    BEGIN_BATCH(5);
651    PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
652    E32(ib, sq_tex_sampler_word0);
653    E32(ib, sq_tex_sampler_word1);
654    E32(ib, sq_tex_sampler_word2);
655    END_BATCH();
656}
657
658//XXX deal with clip offsets in clip setup
659void
660set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
661{
662    RADEONInfoPtr info = RADEONPTR(pScrn);
663
664    BEGIN_BATCH(4);
665    PACK0(ib, PA_SC_SCREEN_SCISSOR_TL, 2);
666    E32(ib, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
667	     (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
668    E32(ib, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
669	     (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
670    END_BATCH();
671}
672
673void
674set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
675{
676    RADEONInfoPtr info = RADEONPTR(pScrn);
677
678    BEGIN_BATCH(4);
679    PACK0(ib, PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
680    E32(ib, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
681	     (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
682	     WINDOW_OFFSET_DISABLE_bit));
683    E32(ib, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
684	     (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
685    END_BATCH();
686}
687
688void
689set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
690{
691    RADEONInfoPtr info = RADEONPTR(pScrn);
692
693    BEGIN_BATCH(4);
694    PACK0(ib, PA_SC_GENERIC_SCISSOR_TL, 2);
695    E32(ib, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
696	     (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
697	     WINDOW_OFFSET_DISABLE_bit));
698    E32(ib, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
699	     (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
700    END_BATCH();
701}
702
703void
704set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
705{
706    RADEONInfoPtr info = RADEONPTR(pScrn);
707
708    BEGIN_BATCH(4);
709    PACK0(ib, PA_SC_WINDOW_SCISSOR_TL, 2);
710    E32(ib, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
711	     (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
712	     WINDOW_OFFSET_DISABLE_bit));
713    E32(ib, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
714	      (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
715    END_BATCH();
716}
717
718void
719set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
720{
721    RADEONInfoPtr info = RADEONPTR(pScrn);
722
723    BEGIN_BATCH(4);
724    PACK0(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
725    E32(ib, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
726	     (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
727    E32(ib, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
728	     (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
729    END_BATCH();
730}
731
732/*
733 * Setup of default state
734 */
735
736void
737set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
738{
739    tex_resource_t tex_res;
740    shader_config_t fs_conf;
741    sq_config_t sq_conf;
742    int i;
743    RADEONInfoPtr info = RADEONPTR(pScrn);
744    struct radeon_accel_state *accel_state = info->accel_state;
745
746    if (accel_state->XInited3D)
747	return;
748
749    memset(&tex_res, 0, sizeof(tex_resource_t));
750    memset(&fs_conf, 0, sizeof(shader_config_t));
751
752    accel_state->XInited3D = TRUE;
753
754    start_3d(pScrn, accel_state->ib);
755
756    // SQ
757    sq_conf.ps_prio = 0;
758    sq_conf.vs_prio = 1;
759    sq_conf.gs_prio = 2;
760    sq_conf.es_prio = 3;
761    // need to set stack/thread/gpr limits based on the asic
762    // for now just set them low enough so any card will work
763    // see r600_cp.c in the drm
764    switch (info->ChipFamily) {
765    case CHIP_FAMILY_R600:
766	sq_conf.num_ps_gprs = 192;
767	sq_conf.num_vs_gprs = 56;
768	sq_conf.num_temp_gprs = 4;
769	sq_conf.num_gs_gprs = 0;
770	sq_conf.num_es_gprs = 0;
771	sq_conf.num_ps_threads = 136;
772	sq_conf.num_vs_threads = 48;
773	sq_conf.num_gs_threads = 4;
774	sq_conf.num_es_threads = 4;
775	sq_conf.num_ps_stack_entries = 128;
776	sq_conf.num_vs_stack_entries = 128;
777	sq_conf.num_gs_stack_entries = 0;
778	sq_conf.num_es_stack_entries = 0;
779	break;
780    case CHIP_FAMILY_RV630:
781    case CHIP_FAMILY_RV635:
782	sq_conf.num_ps_gprs = 84;
783	sq_conf.num_vs_gprs = 36;
784	sq_conf.num_temp_gprs = 4;
785	sq_conf.num_gs_gprs = 0;
786	sq_conf.num_es_gprs = 0;
787	sq_conf.num_ps_threads = 144;
788	sq_conf.num_vs_threads = 40;
789	sq_conf.num_gs_threads = 4;
790	sq_conf.num_es_threads = 4;
791	sq_conf.num_ps_stack_entries = 40;
792	sq_conf.num_vs_stack_entries = 40;
793	sq_conf.num_gs_stack_entries = 32;
794	sq_conf.num_es_stack_entries = 16;
795	break;
796    case CHIP_FAMILY_RV610:
797    case CHIP_FAMILY_RV620:
798    case CHIP_FAMILY_RS780:
799    case CHIP_FAMILY_RS880:
800    default:
801	sq_conf.num_ps_gprs = 84;
802	sq_conf.num_vs_gprs = 36;
803	sq_conf.num_temp_gprs = 4;
804	sq_conf.num_gs_gprs = 0;
805	sq_conf.num_es_gprs = 0;
806	sq_conf.num_ps_threads = 136;
807	sq_conf.num_vs_threads = 48;
808	sq_conf.num_gs_threads = 4;
809	sq_conf.num_es_threads = 4;
810	sq_conf.num_ps_stack_entries = 40;
811	sq_conf.num_vs_stack_entries = 40;
812	sq_conf.num_gs_stack_entries = 32;
813	sq_conf.num_es_stack_entries = 16;
814	break;
815    case CHIP_FAMILY_RV670:
816	sq_conf.num_ps_gprs = 144;
817	sq_conf.num_vs_gprs = 40;
818	sq_conf.num_temp_gprs = 4;
819	sq_conf.num_gs_gprs = 0;
820	sq_conf.num_es_gprs = 0;
821	sq_conf.num_ps_threads = 136;
822	sq_conf.num_vs_threads = 48;
823	sq_conf.num_gs_threads = 4;
824	sq_conf.num_es_threads = 4;
825	sq_conf.num_ps_stack_entries = 40;
826	sq_conf.num_vs_stack_entries = 40;
827	sq_conf.num_gs_stack_entries = 32;
828	sq_conf.num_es_stack_entries = 16;
829	break;
830    case CHIP_FAMILY_RV770:
831	sq_conf.num_ps_gprs = 192;
832	sq_conf.num_vs_gprs = 56;
833	sq_conf.num_temp_gprs = 4;
834	sq_conf.num_gs_gprs = 0;
835	sq_conf.num_es_gprs = 0;
836	sq_conf.num_ps_threads = 188;
837	sq_conf.num_vs_threads = 60;
838	sq_conf.num_gs_threads = 0;
839	sq_conf.num_es_threads = 0;
840	sq_conf.num_ps_stack_entries = 256;
841	sq_conf.num_vs_stack_entries = 256;
842	sq_conf.num_gs_stack_entries = 0;
843	sq_conf.num_es_stack_entries = 0;
844	break;
845    case CHIP_FAMILY_RV730:
846    case CHIP_FAMILY_RV740:
847	sq_conf.num_ps_gprs = 84;
848	sq_conf.num_vs_gprs = 36;
849	sq_conf.num_temp_gprs = 4;
850	sq_conf.num_gs_gprs = 0;
851	sq_conf.num_es_gprs = 0;
852	sq_conf.num_ps_threads = 188;
853	sq_conf.num_vs_threads = 60;
854	sq_conf.num_gs_threads = 0;
855	sq_conf.num_es_threads = 0;
856	sq_conf.num_ps_stack_entries = 128;
857	sq_conf.num_vs_stack_entries = 128;
858	sq_conf.num_gs_stack_entries = 0;
859	sq_conf.num_es_stack_entries = 0;
860	break;
861    case CHIP_FAMILY_RV710:
862	sq_conf.num_ps_gprs = 192;
863	sq_conf.num_vs_gprs = 56;
864	sq_conf.num_temp_gprs = 4;
865	sq_conf.num_gs_gprs = 0;
866	sq_conf.num_es_gprs = 0;
867	sq_conf.num_ps_threads = 144;
868	sq_conf.num_vs_threads = 48;
869	sq_conf.num_gs_threads = 0;
870	sq_conf.num_es_threads = 0;
871	sq_conf.num_ps_stack_entries = 128;
872	sq_conf.num_vs_stack_entries = 128;
873	sq_conf.num_gs_stack_entries = 0;
874	sq_conf.num_es_stack_entries = 0;
875	break;
876    }
877
878    sq_setup(pScrn, ib, &sq_conf);
879
880    BEGIN_BATCH(83);
881    if (info->ChipFamily < CHIP_FAMILY_RV770) {
882	EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
883				 - (28 << TD_FIFO_CREDIT_shift)));
884	EREG(ib, VC_ENHANCE, 0);
885	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
886	EREG(ib, DB_DEBUG, 0x82000000); /* ? */
887	EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
888				 (16 << DEPTH_FLUSH_shift) |
889				 (0 << FORCE_SUMMARIZE_shift) |
890				 (4 << DEPTH_PENDING_FREE_shift) |
891				 (16 << DEPTH_CACHELINE_FREE_shift) |
892				 0));
893    } else {
894	EREG(ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
895			       - (28 << TD_FIFO_CREDIT_shift)));
896	EREG(ib, VC_ENHANCE, 0);
897	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
898	EREG(ib, DB_DEBUG, 0);
899	EREG(ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
900				 (16 << DEPTH_FLUSH_shift) |
901				 (0 << FORCE_SUMMARIZE_shift) |
902				 (4 << DEPTH_PENDING_FREE_shift) |
903				 (4 << DEPTH_CACHELINE_FREE_shift) |
904				 0));
905    }
906
907    PACK0(ib, SQ_VTX_BASE_VTX_LOC, 2);
908    E32(ib, 0);
909    E32(ib, 0);
910
911    PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9);
912    E32(ib, 0); // SQ_ESGS_RING_ITEMSIZE
913    E32(ib, 0); // SQ_GSVS_RING_ITEMSIZE
914    E32(ib, 0); // SQ_ESTMP_RING_ITEMSIZE
915    E32(ib, 0); // SQ_GSTMP_RING_ITEMSIZE
916    E32(ib, 0); // SQ_VSTMP_RING_ITEMSIZE
917    E32(ib, 0); // SQ_PSTMP_RING_ITEMSIZE
918    E32(ib, 0); // SQ_FBUF_RING_ITEMSIZE
919    E32(ib, 0); // SQ_REDUC_RING_ITEMSIZE
920    E32(ib, 0); // SQ_GS_VERT_ITEMSIZE
921
922    // DB
923    EREG(ib, DB_DEPTH_INFO,                       0);
924    EREG(ib, DB_DEPTH_CONTROL,                    0);
925    PACK0(ib, DB_RENDER_CONTROL, 2);
926    E32(ib, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
927    if (info->ChipFamily < CHIP_FAMILY_RV770)
928	E32(ib, FORCE_SHADER_Z_ORDER_bit);
929    else
930	E32(ib, 0);
931    EREG(ib, DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
932						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
933						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
934						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
935    EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
936				 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
937
938    PACK0(ib, DB_STENCIL_CLEAR, 2);
939    E32(ib, 0); // DB_STENCIL_CLEAR
940    E32(ib, 0); // DB_DEPTH_CLEAR
941
942    PACK0(ib, DB_STENCILREFMASK, 3);
943    E32(ib, 0); // DB_STENCILREFMASK
944    E32(ib, 0); // DB_STENCILREFMASK_BF
945    E32(ib, 0); // SX_ALPHA_REF
946
947    PACK0(ib, CB_CLRCMP_CONTROL, 4);
948    E32(ib, 1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
949    E32(ib, 0);							// CB_CLRCMP_SRC
950    E32(ib, 0);							// CB_CLRCMP_DST
951    E32(ib, 0);							// CB_CLRCMP_MSK
952
953    EREG(ib, CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
954    EREG(ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
955
956    PACK0(ib, SX_ALPHA_TEST_CONTROL, 5);
957    E32(ib, 0); // SX_ALPHA_TEST_CONTROL
958    E32(ib, 0x00000000); // CB_BLEND_RED
959    E32(ib, 0x00000000); // CB_BLEND_GREEN
960    E32(ib, 0x00000000); // CB_BLEND_BLUE
961    E32(ib, 0x00000000); // CB_BLEND_ALPHA
962
963    EREG(ib, PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
964						   (0 << WINDOW_Y_OFFSET_shift)));
965
966    if (info->ChipFamily < CHIP_FAMILY_RV770)
967	EREG(ib, R7xx_PA_SC_EDGERULE,             0x00000000);
968    else
969	EREG(ib, R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
970
971    EREG(ib, PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
972
973    END_BATCH();
974
975    /* clip boolean is set to always visible -> doesn't matter */
976    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
977	set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
978
979    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
980	set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
981
982    BEGIN_BATCH(42);
983    PACK0(ib, PA_SC_MPASS_PS_CNTL, 2);
984    E32(ib, 0);
985    if (info->ChipFamily < CHIP_FAMILY_RV770)
986	E32(ib, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
987    else
988	E32(ib, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
989		 0x00500000)); /* ? */
990
991    PACK0(ib, PA_SC_LINE_CNTL, 9);
992    E32(ib, 0); // PA_SC_LINE_CNTL
993    E32(ib, 0); // PA_SC_AA_CONFIG
994    E32(ib, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL
995	     (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
996    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_CLIP_ADJ
997    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_DISC_ADJ
998    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
999    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1000    E32(ib, 0);                                                 // PA_SC_AA_SAMPLE_LOCS_MCTX
1001    E32(ib, 0);                                                 // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M
1002
1003    EREG(ib, PA_SC_AA_MASK,                       0xFFFFFFFF);
1004
1005    PACK0(ib, PA_CL_CLIP_CNTL, 5);
1006    E32(ib, CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1007    E32(ib, FACE_bit);         // PA_SU_SC_MODE_CNTL
1008    E32(ib, VTX_XY_FMT_bit);   // PA_CL_VTE_CNTL
1009    E32(ib, 0);                // PA_CL_VS_OUT_CNTL
1010    E32(ib, 0);                // PA_CL_NANINF_CNTL
1011
1012    PACK0(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1013    E32(ib, 0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL
1014    E32(ib, 0); // PA_SU_POLY_OFFSET_CLAMP
1015    E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_SCALE
1016    E32(ib, 0); // PA_SU_POLY_OFFSET_FRONT_OFFSET
1017    E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_SCALE
1018    E32(ib, 0); // PA_SU_POLY_OFFSET_BACK_OFFSET
1019
1020    // SPI
1021    if (info->ChipFamily < CHIP_FAMILY_RV770)
1022	EREG(ib, R7xx_SPI_THREAD_GROUPING,        0);
1023    else
1024	EREG(ib, R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
1025
1026    PACK0(ib, SPI_INPUT_Z, 4);
1027    E32(ib, 0); // SPI_INPUT_Z
1028    E32(ib, 0); // SPI_FOG_CNTL
1029    E32(ib, 0); // SPI_FOG_FUNC_SCALE
1030    E32(ib, 0); // SPI_FOG_FUNC_BIAS
1031
1032    END_BATCH();
1033
1034    // clear FS
1035    fs_conf.bo = accel_state->shaders_bo;
1036    fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1037
1038    // VGT
1039    BEGIN_BATCH(43);
1040    PACK0(ib, VGT_MAX_VTX_INDX, 4);
1041    E32(ib, 2048); /* XXX set to a reasonably large number of indices */ // VGT_MAX_VTX_INDX
1042    E32(ib, 0); // VGT_MIN_VTX_INDX
1043    E32(ib, 0); // VGT_INDX_OFFSET
1044    E32(ib, 0); // VGT_MULTI_PRIM_IB_RESET_INDX
1045
1046    EREG(ib, VGT_PRIMITIVEID_EN,                  0);
1047    EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN,          0);
1048
1049    PACK0(ib, VGT_INSTANCE_STEP_RATE_0, 2);
1050    E32(ib, 0); // VGT_INSTANCE_STEP_RATE_0
1051    E32(ib, 0); // VGT_INSTANCE_STEP_RATE_1
1052
1053    PACK0(ib, PA_SU_POINT_SIZE, 17);
1054    E32(ib, 0); // PA_SU_POINT_SIZE
1055    E32(ib, 0); // PA_SU_POINT_MINMAX
1056    E32(ib, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1057    E32(ib, 0); // PA_SC_LINE_STIPPLE
1058    E32(ib, 0); // VGT_OUTPUT_PATH_CNTL
1059    E32(ib, 0); // VGT_HOS_CNTL
1060    E32(ib, 0); // VGT_HOS_MAX_TESS_LEVEL
1061    E32(ib, 0); // VGT_HOS_MIN_TESS_LEVEL
1062    E32(ib, 0); // VGT_HOS_REUSE_DEPTH
1063    E32(ib, 0); // VGT_GROUP_PRIM_TYPE
1064    E32(ib, 0); // VGT_GROUP_FIRST_DECR
1065    E32(ib, 0); // VGT_GROUP_DECR
1066    E32(ib, 0); // VGT_GROUP_VECT_0_CNTL
1067    E32(ib, 0); // VGT_GROUP_VECT_1_CNTL
1068    E32(ib, 0); // VGT_GROUP_VECT_0_FMT_CNTL
1069    E32(ib, 0); // VGT_GROUP_VECT_1_FMT_CNTL
1070    E32(ib, 0); // VGT_GS_MODE
1071
1072    PACK0(ib, VGT_STRMOUT_EN, 3);
1073    E32(ib, 0); // VGT_STRMOUT_EN
1074    E32(ib, 0); // VGT_REUSE_OFF
1075    E32(ib, 0); // VGT_VTX_CNT_EN
1076
1077    EREG(ib, VGT_STRMOUT_BUFFER_EN,               0);
1078    END_BATCH();
1079}
1080
1081
1082/*
1083 * Commands
1084 */
1085
1086void
1087draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
1088{
1089    RADEONInfoPtr info = RADEONPTR(pScrn);
1090    uint32_t i, count;
1091
1092    // calculate num of packets
1093    count = 2;
1094    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1095	count += (draw_conf->num_indices + 1) / 2;
1096    else
1097	count += draw_conf->num_indices;
1098
1099    BEGIN_BATCH(8 + count);
1100    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1101    PACK3(ib, IT_INDEX_TYPE, 1);
1102    E32(ib, draw_conf->index_type);
1103    PACK3(ib, IT_NUM_INSTANCES, 1);
1104    E32(ib, draw_conf->num_instances);
1105
1106    PACK3(ib, IT_DRAW_INDEX_IMMD, count);
1107    E32(ib, draw_conf->num_indices);
1108    E32(ib, draw_conf->vgt_draw_initiator);
1109
1110    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1111	for (i = 0; i < draw_conf->num_indices; i += 2) {
1112	    if ((i + 1) == draw_conf->num_indices)
1113		E32(ib, indices[i]);
1114	    else
1115		E32(ib, (indices[i] | (indices[i + 1] << 16)));
1116	}
1117    } else {
1118	for (i = 0; i < draw_conf->num_indices; i++)
1119	    E32(ib, indices[i]);
1120    }
1121    END_BATCH();
1122}
1123
1124void
1125draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
1126{
1127    RADEONInfoPtr info = RADEONPTR(pScrn);
1128
1129    BEGIN_BATCH(10);
1130    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1131    PACK3(ib, IT_INDEX_TYPE, 1);
1132    E32(ib, draw_conf->index_type);
1133    PACK3(ib, IT_NUM_INSTANCES, 1);
1134    E32(ib, draw_conf->num_instances);
1135    PACK3(ib, IT_DRAW_INDEX_AUTO, 2);
1136    E32(ib, draw_conf->num_indices);
1137    E32(ib, draw_conf->vgt_draw_initiator);
1138    END_BATCH();
1139}
1140
1141Bool
1142r600_vb_get(ScrnInfoPtr pScrn)
1143{
1144    RADEONInfoPtr info = RADEONPTR(pScrn);
1145    struct radeon_accel_state *accel_state = info->accel_state;
1146
1147    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
1148	(accel_state->ib->idx*accel_state->ib->total)+
1149	(accel_state->ib->total / 2);
1150    accel_state->vb_total = (accel_state->ib->total / 2);
1151    accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address +
1152				    (accel_state->ib->total / 2));
1153    accel_state->vb_offset = 0;
1154    return TRUE;
1155}
1156
1157void
1158r600_vb_discard(ScrnInfoPtr pScrn)
1159{
1160    RADEONInfoPtr info = RADEONPTR(pScrn);
1161
1162    info->accel_state->vb_start_op = -1;
1163}
1164
1165
1166
1167int
1168r600_cp_start(ScrnInfoPtr pScrn)
1169{
1170    RADEONInfoPtr info = RADEONPTR(pScrn);
1171    struct radeon_accel_state *accel_state = info->accel_state;
1172
1173#if defined(XF86DRM_MODE)
1174    if (info->cs) {
1175	if (CS_FULL(info->cs)) {
1176	    radeon_cs_flush_indirect(pScrn);
1177	}
1178	accel_state->ib_reset_op = info->cs->cdw;
1179	accel_state->vb_start_op = accel_state->vb_offset;
1180    } else
1181#endif
1182    {
1183	accel_state->ib = RADEONCPGetBuffer(pScrn);
1184	if (!r600_vb_get(pScrn)) {
1185	    return -1;
1186	}
1187	accel_state->vb_start_op = accel_state->vb_offset;
1188    }
1189    return 0;
1190}
1191
1192void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1193{
1194    RADEONInfoPtr info = RADEONPTR(pScrn);
1195    struct radeon_accel_state *accel_state = info->accel_state;
1196    draw_config_t   draw_conf;
1197    vtx_resource_t  vtx_res;
1198
1199    if (accel_state->vb_start_op == -1)
1200	return;
1201
1202    CLEAR (draw_conf);
1203    CLEAR (vtx_res);
1204
1205    if (accel_state->vb_offset == accel_state->vb_start_op) {
1206        R600IBDiscard(pScrn, accel_state->ib);
1207	r600_vb_discard(pScrn);
1208	return;
1209    }
1210
1211    /* flush vertex cache */
1212    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
1213	(info->ChipFamily == CHIP_FAMILY_RV620) ||
1214	(info->ChipFamily == CHIP_FAMILY_RS780) ||
1215	(info->ChipFamily == CHIP_FAMILY_RS880) ||
1216	(info->ChipFamily == CHIP_FAMILY_RV710))
1217	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1218			    accel_state->vb_offset, accel_state->vb_mc_addr,
1219			    accel_state->vb_bo,
1220			    RADEON_GEM_DOMAIN_GTT, 0);
1221    else
1222	cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
1223			    accel_state->vb_offset, accel_state->vb_mc_addr,
1224			    accel_state->vb_bo,
1225			    RADEON_GEM_DOMAIN_GTT, 0);
1226
1227    /* Vertex buffer setup */
1228    accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
1229    vtx_res.id              = SQ_VTX_RESOURCE_vs;
1230    vtx_res.vtx_size_dw     = vtx_size / 4;
1231    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
1232    vtx_res.mem_req_size    = 1;
1233    vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
1234    vtx_res.bo              = accel_state->vb_bo;
1235    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1236
1237    /* Draw */
1238    draw_conf.prim_type          = DI_PT_RECTLIST;
1239    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1240    draw_conf.num_instances      = 1;
1241    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1242    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1243
1244    draw_auto(pScrn, accel_state->ib, &draw_conf);
1245
1246    /* XXX drm should handle this in fence submit */
1247    wait_3d_idle_clean(pScrn, accel_state->ib);
1248
1249    /* sync dst surface */
1250    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1251			accel_state->dst_size, accel_state->dst_obj.offset,
1252			accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1253
1254    accel_state->vb_start_op = -1;
1255    accel_state->ib_reset_op = 0;
1256
1257#if KMS_MULTI_OP
1258    if (!info->cs)
1259#endif
1260	R600CPFlushIndirect(pScrn, accel_state->ib);
1261}
1262
1263void r600_vb_no_space(ScrnInfoPtr pScrn, int vert_size)
1264{
1265#ifdef XF86DRM_MODE
1266    RADEONInfoPtr info = RADEONPTR(pScrn);
1267    struct radeon_accel_state *accel_state = info->accel_state;
1268
1269    if (info->cs) {
1270	if (accel_state->vb_bo) {
1271	    if (accel_state->vb_start_op != accel_state->vb_offset) {
1272		r600_finish_op(pScrn, vert_size);
1273		accel_state->ib_reset_op = info->cs->cdw;
1274	    }
1275
1276	    /* release the current VBO */
1277	    radeon_vbo_put(pScrn);
1278	}
1279
1280	/* get a new one */
1281	radeon_vbo_get(pScrn);
1282	return;
1283    }
1284#endif
1285
1286    if (accel_state->vb_start_op != -1) {
1287	r600_finish_op(pScrn, vert_size);
1288	r600_cp_start(pScrn);
1289    }
1290}
1291