r6xx_accel.c revision de2362d3
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *          Matthias Hopf <mhopf@suse.de>
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#include "xf86.h"
31
32#include <errno.h>
33
34#include "radeon.h"
35#include "r600_shader.h"
36#include "radeon_reg.h"
37#include "r600_reg.h"
38#include "r600_state.h"
39
40#include "radeon_vbo.h"
41#include "radeon_exa_shared.h"
42
43static const uint32_t R600_ROP[16] = {
44    RADEON_ROP3_ZERO, /* GXclear        */
45    RADEON_ROP3_DSa,  /* Gxand          */
46    RADEON_ROP3_SDna, /* GXandReverse   */
47    RADEON_ROP3_S,    /* GXcopy         */
48    RADEON_ROP3_DSna, /* GXandInverted  */
49    RADEON_ROP3_D,    /* GXnoop         */
50    RADEON_ROP3_DSx,  /* GXxor          */
51    RADEON_ROP3_DSo,  /* GXor           */
52    RADEON_ROP3_DSon, /* GXnor          */
53    RADEON_ROP3_DSxn, /* GXequiv        */
54    RADEON_ROP3_Dn,   /* GXinvert       */
55    RADEON_ROP3_SDno, /* GXorReverse    */
56    RADEON_ROP3_Sn,   /* GXcopyInverted */
57    RADEON_ROP3_DSno, /* GXorInverted   */
58    RADEON_ROP3_DSan, /* GXnand         */
59    RADEON_ROP3_ONE,  /* GXset          */
60};
61
62/* we try and batch operations together under KMS -
63   but it doesn't work yet without misrendering */
64#define KMS_MULTI_OP 1
65
66/* Flush the indirect buffer to the kernel for submission to the card */
67void R600CPFlushIndirect(ScrnInfoPtr pScrn)
68{
69    radeon_cs_flush_indirect(pScrn);
70}
71
72void R600IBDiscard(ScrnInfoPtr pScrn)
73{
74    radeon_ib_discard(pScrn);
75}
76
77void
78r600_wait_3d_idle_clean(ScrnInfoPtr pScrn)
79{
80    RADEONInfoPtr info = RADEONPTR(pScrn);
81
82    //flush caches, don't generate timestamp
83    BEGIN_BATCH(5);
84    PACK3(IT_EVENT_WRITE, 1);
85    E32(CACHE_FLUSH_AND_INV_EVENT);
86    // wait for 3D idle clean
87    EREG(WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
88						   WAIT_3D_IDLECLEAN_bit));
89    END_BATCH();
90}
91
92void
93r600_wait_3d_idle(ScrnInfoPtr pScrn)
94{
95    RADEONInfoPtr info = RADEONPTR(pScrn);
96
97    BEGIN_BATCH(3);
98    EREG(WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
99    END_BATCH();
100}
101
102void
103r600_start_3d(ScrnInfoPtr pScrn)
104{
105    RADEONInfoPtr info = RADEONPTR(pScrn);
106
107    if (info->ChipFamily < CHIP_FAMILY_RV770) {
108	BEGIN_BATCH(5);
109	PACK3(IT_START_3D_CMDBUF, 1);
110	E32(0);
111    } else
112	BEGIN_BATCH(3);
113
114    PACK3(IT_CONTEXT_CONTROL, 2);
115    E32(0x80000000);
116    E32(0x80000000);
117    END_BATCH();
118
119}
120
121/*
122 * Setup of functional groups
123 */
124
125// asic stack/thread/gpr limits - need to query the drm
126static void
127r600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
128{
129    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
130    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
131    RADEONInfoPtr info = RADEONPTR(pScrn);
132
133    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
134	(info->ChipFamily == CHIP_FAMILY_RV620) ||
135	(info->ChipFamily == CHIP_FAMILY_RS780) ||
136	(info->ChipFamily == CHIP_FAMILY_RS880) ||
137	(info->ChipFamily == CHIP_FAMILY_RV710))
138	sq_config = 0;						// no VC
139    else
140	sq_config = VC_ENABLE_bit;
141
142    sq_config |= (DX9_CONSTS_bit |
143		  ALU_INST_PREFER_VECTOR_bit |
144		  (sq_conf->ps_prio << PS_PRIO_shift) |
145		  (sq_conf->vs_prio << VS_PRIO_shift) |
146		  (sq_conf->gs_prio << GS_PRIO_shift) |
147		  (sq_conf->es_prio << ES_PRIO_shift));
148
149    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
150			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
151			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
152    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
153			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
154
155    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
156			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
157			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
158			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
159
160    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
161				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
162
163    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
164				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
165
166    BEGIN_BATCH(8);
167    PACK0(SQ_CONFIG, 6);
168    E32(sq_config);
169    E32(sq_gpr_resource_mgmt_1);
170    E32(sq_gpr_resource_mgmt_2);
171    E32(sq_thread_resource_mgmt);
172    E32(sq_stack_resource_mgmt_1);
173    E32(sq_stack_resource_mgmt_2);
174    END_BATCH();
175}
176
177void
178r600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
179{
180    uint32_t cb_color_info, cb_color_control;
181    unsigned pitch, slice, h, array_mode;
182    RADEONInfoPtr info = RADEONPTR(pScrn);
183
184
185    if (cb_conf->surface) {
186	switch (cb_conf->surface->level[0].mode) {
187	case RADEON_SURF_MODE_1D:
188		array_mode = 2;
189		break;
190	case RADEON_SURF_MODE_2D:
191		array_mode = 4;
192		break;
193	default:
194		array_mode = 0;
195		break;
196	}
197	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
198	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
199    } else
200    {
201	array_mode = cb_conf->array_mode;
202	pitch = (cb_conf->w / 8) - 1;
203	h = RADEON_ALIGN(cb_conf->h, 8);
204	slice = ((cb_conf->w * h) / 64) - 1;
205    }
206
207    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
208		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
209		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
210		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
211		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
212		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
213    if (cb_conf->read_size)
214	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
215    if (cb_conf->blend_clamp)
216	cb_color_info |= BLEND_CLAMP_bit;
217    if (cb_conf->clear_color)
218	cb_color_info |= CLEAR_COLOR_bit;
219    if (cb_conf->blend_bypass)
220	cb_color_info |= BLEND_BYPASS_bit;
221    if (cb_conf->blend_float32)
222	cb_color_info |= BLEND_FLOAT32_bit;
223    if (cb_conf->simple_float)
224	cb_color_info |= SIMPLE_FLOAT_bit;
225    if (cb_conf->round_mode)
226	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
227    if (cb_conf->tile_compact)
228	cb_color_info |= TILE_COMPACT_bit;
229    if (cb_conf->source_format)
230	cb_color_info |= SOURCE_FORMAT_bit;
231
232    BEGIN_BATCH(3 + 2);
233    EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
234    RELOC_BATCH(cb_conf->bo, 0, domain);
235    END_BATCH();
236
237    // rv6xx workaround
238    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
239        (info->ChipFamily < CHIP_FAMILY_RV770)) {
240        BEGIN_BATCH(2);
241        PACK3(IT_SURFACE_BASE_UPDATE, 1);
242        E32((2 << cb_conf->id));
243        END_BATCH();
244    }
245    /* Set CMASK & TILE buffer to the offset of color buffer as
246     * we don't use those this shouldn't cause any issue and we
247     * then have a valid cmd stream
248     */
249    BEGIN_BATCH(3 + 2);
250    EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
251    RELOC_BATCH(cb_conf->bo, 0, domain);
252    END_BATCH();
253    BEGIN_BATCH(3 + 2);
254    EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
255    RELOC_BATCH(cb_conf->bo, 0, domain);
256    END_BATCH();
257    BEGIN_BATCH(9);
258    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
259    EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
260						    (slice << SLICE_TILE_MAX_shift)));
261    EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
262						    (0    << SLICE_MAX_shift)));
263    EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
264						    (0    << FMASK_TILE_MAX_shift)));
265    END_BATCH();
266
267    BEGIN_BATCH(3 + 2);
268    EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
269    RELOC_BATCH(cb_conf->bo, 0, domain);
270    END_BATCH();
271
272    BEGIN_BATCH(9);
273    EREG(CB_TARGET_MASK,          (cb_conf->pmask << TARGET0_ENABLE_shift));
274    cb_color_control = R600_ROP[cb_conf->rop] |
275	(cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift);
276    if (info->ChipFamily == CHIP_FAMILY_R600) {
277	/* no per-MRT blend on R600 */
278	EREG(CB_COLOR_CONTROL,    cb_color_control);
279	EREG(CB_BLEND_CONTROL,    cb_conf->blendcntl);
280    } else {
281	if (cb_conf->blend_enable)
282	    cb_color_control |= PER_MRT_BLEND_bit;
283	EREG(CB_COLOR_CONTROL,    cb_color_control);
284	EREG(CB_BLEND0_CONTROL,   cb_conf->blendcntl);
285    }
286    END_BATCH();
287}
288
289static void
290r600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
291			 uint32_t size, uint64_t mc_addr,
292			 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
293{
294    RADEONInfoPtr info = RADEONPTR(pScrn);
295    uint32_t cp_coher_size;
296    if (size == 0xffffffff)
297	cp_coher_size = 0xffffffff;
298    else
299	cp_coher_size = ((size + 255) >> 8);
300
301    BEGIN_BATCH(5 + 2);
302    PACK3(IT_SURFACE_SYNC, 4);
303    E32(sync_type);
304    E32(cp_coher_size);
305    E32((mc_addr >> 8));
306    E32(10); /* poll interval */
307    RELOC_BATCH(bo, rdomains, wdomain);
308    END_BATCH();
309}
310
311/* inserts a wait for vline in the command stream */
312void
313r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
314			xf86CrtcPtr crtc, int start, int stop)
315{
316    RADEONInfoPtr  info = RADEONPTR(pScrn);
317    drmmode_crtc_private_ptr drmmode_crtc;
318
319    if (!crtc)
320        return;
321
322    if (!crtc->enabled)
323        return;
324
325    if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
326        return;
327
328    start = max(start, crtc->y);
329    stop = min(stop, crtc->y + crtc->mode.VDisplay);
330
331    if (start >= stop)
332        return;
333
334    drmmode_crtc = crtc->driver_private;
335
336    BEGIN_BATCH(11);
337    /* set the VLINE range */
338    EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
339	 (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
340	 (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
341
342    /* tell the CP to poll the VLINE state register */
343    PACK3(IT_WAIT_REG_MEM, 6);
344    E32(IT_WAIT_REG | IT_WAIT_EQ);
345    E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
346    E32(0);
347    E32(0);                          // Ref value
348    E32(AVIVO_D1MODE_VLINE_STAT);    // Mask
349    E32(10);                         // Wait interval
350    /* add crtc reloc */
351    PACK3(IT_NOP, 1);
352    E32(drmmode_crtc->mode_crtc->crtc_id);
353    END_BATCH();
354}
355
356void
357r600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
358{
359    RADEONInfoPtr info = RADEONPTR(pScrn);
360
361    BEGIN_BATCH(8);
362    /* Interpolator setup */
363    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
364    PACK0(SPI_PS_IN_CONTROL_0, 3);
365    E32((num_interp << NUM_INTERP_shift));
366    E32(0);
367    E32(0);
368    END_BATCH();
369}
370
371void
372r600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
373{
374    RADEONInfoPtr info = RADEONPTR(pScrn);
375    uint32_t sq_pgm_resources;
376
377    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
378			(fs_conf->stack_size << STACK_SIZE_shift));
379
380    if (fs_conf->dx10_clamp)
381	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
382
383    BEGIN_BATCH(3 + 2);
384    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
385    RELOC_BATCH(fs_conf->bo, domain, 0);
386    END_BATCH();
387
388    BEGIN_BATCH(6);
389    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
390    EREG(SQ_PGM_CF_OFFSET_FS, 0);
391    END_BATCH();
392}
393
394void
395r600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
396{
397    RADEONInfoPtr info = RADEONPTR(pScrn);
398    uint32_t sq_pgm_resources;
399
400    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
401			(vs_conf->stack_size << STACK_SIZE_shift));
402
403    if (vs_conf->dx10_clamp)
404	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
405    if (vs_conf->fetch_cache_lines)
406	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
407    if (vs_conf->uncached_first_inst)
408	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
409
410    /* flush SQ cache */
411    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
412			     vs_conf->shader_size, vs_conf->shader_addr,
413			     vs_conf->bo, domain, 0);
414
415    BEGIN_BATCH(3 + 2);
416    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
417    RELOC_BATCH(vs_conf->bo, domain, 0);
418    END_BATCH();
419
420    BEGIN_BATCH(6);
421    EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources);
422    EREG(SQ_PGM_CF_OFFSET_VS, 0);
423    END_BATCH();
424}
425
426void
427r600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
428{
429    RADEONInfoPtr info = RADEONPTR(pScrn);
430    uint32_t sq_pgm_resources;
431
432    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
433			(ps_conf->stack_size << STACK_SIZE_shift));
434
435    if (ps_conf->dx10_clamp)
436	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
437    if (ps_conf->fetch_cache_lines)
438	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
439    if (ps_conf->uncached_first_inst)
440	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
441    if (ps_conf->clamp_consts)
442	sq_pgm_resources |= CLAMP_CONSTS_bit;
443
444    /* flush SQ cache */
445    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
446			     ps_conf->shader_size, ps_conf->shader_addr,
447			     ps_conf->bo, domain, 0);
448
449    BEGIN_BATCH(3 + 2);
450    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
451    RELOC_BATCH(ps_conf->bo, domain, 0);
452    END_BATCH();
453
454    BEGIN_BATCH(9);
455    EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources);
456    EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
457    EREG(SQ_PGM_CF_OFFSET_PS, 0);
458    END_BATCH();
459}
460
461void
462r600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf)
463{
464    RADEONInfoPtr info = RADEONPTR(pScrn);
465    int i;
466    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
467
468    BEGIN_BATCH(2 + countreg);
469    PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
470    for (i = 0; i < countreg; i++)
471	EFLOAT(const_buf[i]);
472    END_BATCH();
473}
474
475void
476r600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
477{
478    RADEONInfoPtr info = RADEONPTR(pScrn);
479    /* bool register order is: ps, vs, gs; one register each
480     * 1 bits per bool; 32 bools each for ps, vs, gs.
481     */
482    BEGIN_BATCH(3);
483    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
484    END_BATCH();
485}
486
487static void
488r600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
489{
490    RADEONInfoPtr info = RADEONPTR(pScrn);
491    struct radeon_accel_state *accel_state = info->accel_state;
492    uint32_t sq_vtx_constant_word2;
493
494    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
495			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
496			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
497			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
498			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
499    if (res->clamp_x)
500	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
501
502    if (res->format_comp_all)
503	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
504
505    if (res->srf_mode_all)
506	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
507
508    /* flush vertex cache */
509    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
510	(info->ChipFamily == CHIP_FAMILY_RV620) ||
511	(info->ChipFamily == CHIP_FAMILY_RS780) ||
512	(info->ChipFamily == CHIP_FAMILY_RS880) ||
513	(info->ChipFamily == CHIP_FAMILY_RV710))
514	r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
515				 accel_state->vbo.vb_offset, 0,
516				 res->bo,
517				 domain, 0);
518    else
519	r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
520				 accel_state->vbo.vb_offset, 0,
521				 res->bo,
522				 domain, 0);
523
524    BEGIN_BATCH(9 + 2);
525    PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
526    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
527    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
528    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
529    E32(res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
530    E32(0);							// 4: n/a
531    E32(0);							// 5: n/a
532    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
533    RELOC_BATCH(res->bo, domain, 0);
534    END_BATCH();
535}
536
537void
538r600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
539{
540    RADEONInfoPtr info = RADEONPTR(pScrn);
541    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
542    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
543    uint32_t array_mode, pitch;
544
545    if (tex_res->surface) {
546	switch (tex_res->surface->level[0].mode) {
547	case RADEON_SURF_MODE_1D:
548		array_mode = 2;
549		break;
550	case RADEON_SURF_MODE_2D:
551		array_mode = 4;
552		break;
553	default:
554		array_mode = 0;
555		break;
556	}
557	pitch = tex_res->surface->level[0].nblk_x >> 3;
558    } else
559    {
560	array_mode = tex_res->tile_mode;
561	pitch = (tex_res->pitch + 7) >> 3;
562    }
563
564    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
565		     (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
566
567    if (tex_res->w)
568	sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) |
569				  ((tex_res->w - 1) << TEX_WIDTH_shift));
570
571    if (tex_res->tile_type)
572	sq_tex_resource_word0 |= TILE_TYPE_bit;
573
574    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
575
576    if (tex_res->h)
577	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
578    if (tex_res->depth)
579	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
580
581    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
582			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
583			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
584			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
585			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
586			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
587			     (tex_res->request_size << REQUEST_SIZE_shift) |
588			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
589			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
590			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
591			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
592			     (tex_res->base_level << BASE_LEVEL_shift));
593
594    if (tex_res->srf_mode_all)
595	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
596    if (tex_res->force_degamma)
597	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
598
599    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
600			     (tex_res->base_array << BASE_ARRAY_shift) |
601			     (tex_res->last_array << LAST_ARRAY_shift));
602
603    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
604			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
605			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
606
607    if (tex_res->interlaced)
608	sq_tex_resource_word6 |= INTERLACED_bit;
609
610    /* flush texture cache */
611    r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
612			     tex_res->size, tex_res->base,
613			     tex_res->bo, domain, 0);
614
615    BEGIN_BATCH(9 + 4);
616    PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
617    E32(sq_tex_resource_word0);
618    E32(sq_tex_resource_word1);
619    E32(((tex_res->base) >> 8));
620    E32(((tex_res->mip_base) >> 8));
621    E32(sq_tex_resource_word4);
622    E32(sq_tex_resource_word5);
623    E32(sq_tex_resource_word6);
624    RELOC_BATCH(tex_res->bo, domain, 0);
625    RELOC_BATCH(tex_res->mip_bo, domain, 0);
626    END_BATCH();
627}
628
629void
630r600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
631{
632    RADEONInfoPtr info = RADEONPTR(pScrn);
633    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
634
635    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
636			    (s->clamp_y       << CLAMP_Y_shift)					|
637			    (s->clamp_z       << CLAMP_Z_shift)					|
638			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
639			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
640			    (s->z_filter      << Z_FILTER_shift)	|
641			    (s->mip_filter    << MIP_FILTER_shift)				|
642			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
643			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
644			    (s->chroma_key    << CHROMA_KEY_shift));
645    if (s->point_sampling_clamp)
646	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
647    if (s->tex_array_override)
648	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
649    if (s->lod_uses_minor_axis)
650	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
651
652    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
653			    (s->max_lod       << MAX_LOD_shift)					|
654			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
655
656    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
657			    (s->perf_mip      << PERF_MIP_shift)	|
658			    (s->perf_z        << PERF_Z_shift));
659    if (s->mc_coord_truncate)
660	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
661    if (s->force_degamma)
662	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
663    if (s->high_precision_filter)
664	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
665    if (s->fetch_4)
666	sq_tex_sampler_word2 |= FETCH_4_bit;
667    if (s->sample_is_pcf)
668	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
669    if (s->type)
670	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
671
672    BEGIN_BATCH(5);
673    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
674    E32(sq_tex_sampler_word0);
675    E32(sq_tex_sampler_word1);
676    E32(sq_tex_sampler_word2);
677    END_BATCH();
678}
679
680//XXX deal with clip offsets in clip setup
681void
682r600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
683{
684    RADEONInfoPtr info = RADEONPTR(pScrn);
685
686    BEGIN_BATCH(4);
687    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
688    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
689	     (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
690    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
691	     (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
692    END_BATCH();
693}
694
695void
696r600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
697{
698    RADEONInfoPtr info = RADEONPTR(pScrn);
699
700    BEGIN_BATCH(4);
701    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
702    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
703	     (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
704	     WINDOW_OFFSET_DISABLE_bit));
705    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
706	     (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
707    END_BATCH();
708}
709
710void
711r600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
712{
713    RADEONInfoPtr info = RADEONPTR(pScrn);
714
715    BEGIN_BATCH(4);
716    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
717    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
718	     (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
719	     WINDOW_OFFSET_DISABLE_bit));
720    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
721	     (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
722    END_BATCH();
723}
724
725void
726r600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
727{
728    RADEONInfoPtr info = RADEONPTR(pScrn);
729
730    BEGIN_BATCH(4);
731    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
732    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
733	     (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
734	     WINDOW_OFFSET_DISABLE_bit));
735    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
736	      (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
737    END_BATCH();
738}
739
740void
741r600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
742{
743    RADEONInfoPtr info = RADEONPTR(pScrn);
744
745    BEGIN_BATCH(4);
746    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
747    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
748	     (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
749    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
750	     (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
751    END_BATCH();
752}
753
754/*
755 * Setup of default state
756 */
757
758void
759r600_set_default_state(ScrnInfoPtr pScrn)
760{
761    tex_resource_t tex_res;
762    shader_config_t fs_conf;
763    sq_config_t sq_conf;
764    int i;
765    RADEONInfoPtr info = RADEONPTR(pScrn);
766    struct radeon_accel_state *accel_state = info->accel_state;
767
768    if (accel_state->XInited3D)
769	return;
770
771    memset(&tex_res, 0, sizeof(tex_resource_t));
772    memset(&fs_conf, 0, sizeof(shader_config_t));
773
774    accel_state->XInited3D = TRUE;
775
776    r600_start_3d(pScrn);
777
778    // SQ
779    sq_conf.ps_prio = 0;
780    sq_conf.vs_prio = 1;
781    sq_conf.gs_prio = 2;
782    sq_conf.es_prio = 3;
783    // need to set stack/thread/gpr limits based on the asic
784    // for now just set them low enough so any card will work
785    // see r600_cp.c in the drm
786    switch (info->ChipFamily) {
787    case CHIP_FAMILY_R600:
788	sq_conf.num_ps_gprs = 192;
789	sq_conf.num_vs_gprs = 56;
790	sq_conf.num_temp_gprs = 4;
791	sq_conf.num_gs_gprs = 0;
792	sq_conf.num_es_gprs = 0;
793	sq_conf.num_ps_threads = 136;
794	sq_conf.num_vs_threads = 48;
795	sq_conf.num_gs_threads = 4;
796	sq_conf.num_es_threads = 4;
797	sq_conf.num_ps_stack_entries = 128;
798	sq_conf.num_vs_stack_entries = 128;
799	sq_conf.num_gs_stack_entries = 0;
800	sq_conf.num_es_stack_entries = 0;
801	break;
802    case CHIP_FAMILY_RV630:
803    case CHIP_FAMILY_RV635:
804	sq_conf.num_ps_gprs = 84;
805	sq_conf.num_vs_gprs = 36;
806	sq_conf.num_temp_gprs = 4;
807	sq_conf.num_gs_gprs = 0;
808	sq_conf.num_es_gprs = 0;
809	sq_conf.num_ps_threads = 144;
810	sq_conf.num_vs_threads = 40;
811	sq_conf.num_gs_threads = 4;
812	sq_conf.num_es_threads = 4;
813	sq_conf.num_ps_stack_entries = 40;
814	sq_conf.num_vs_stack_entries = 40;
815	sq_conf.num_gs_stack_entries = 32;
816	sq_conf.num_es_stack_entries = 16;
817	break;
818    case CHIP_FAMILY_RV610:
819    case CHIP_FAMILY_RV620:
820    case CHIP_FAMILY_RS780:
821    case CHIP_FAMILY_RS880:
822    default:
823	sq_conf.num_ps_gprs = 84;
824	sq_conf.num_vs_gprs = 36;
825	sq_conf.num_temp_gprs = 4;
826	sq_conf.num_gs_gprs = 0;
827	sq_conf.num_es_gprs = 0;
828	sq_conf.num_ps_threads = 136;
829	sq_conf.num_vs_threads = 48;
830	sq_conf.num_gs_threads = 4;
831	sq_conf.num_es_threads = 4;
832	sq_conf.num_ps_stack_entries = 40;
833	sq_conf.num_vs_stack_entries = 40;
834	sq_conf.num_gs_stack_entries = 32;
835	sq_conf.num_es_stack_entries = 16;
836	break;
837    case CHIP_FAMILY_RV670:
838	sq_conf.num_ps_gprs = 144;
839	sq_conf.num_vs_gprs = 40;
840	sq_conf.num_temp_gprs = 4;
841	sq_conf.num_gs_gprs = 0;
842	sq_conf.num_es_gprs = 0;
843	sq_conf.num_ps_threads = 136;
844	sq_conf.num_vs_threads = 48;
845	sq_conf.num_gs_threads = 4;
846	sq_conf.num_es_threads = 4;
847	sq_conf.num_ps_stack_entries = 40;
848	sq_conf.num_vs_stack_entries = 40;
849	sq_conf.num_gs_stack_entries = 32;
850	sq_conf.num_es_stack_entries = 16;
851	break;
852    case CHIP_FAMILY_RV770:
853	sq_conf.num_ps_gprs = 192;
854	sq_conf.num_vs_gprs = 56;
855	sq_conf.num_temp_gprs = 4;
856	sq_conf.num_gs_gprs = 0;
857	sq_conf.num_es_gprs = 0;
858	sq_conf.num_ps_threads = 188;
859	sq_conf.num_vs_threads = 60;
860	sq_conf.num_gs_threads = 0;
861	sq_conf.num_es_threads = 0;
862	sq_conf.num_ps_stack_entries = 256;
863	sq_conf.num_vs_stack_entries = 256;
864	sq_conf.num_gs_stack_entries = 0;
865	sq_conf.num_es_stack_entries = 0;
866	break;
867    case CHIP_FAMILY_RV730:
868    case CHIP_FAMILY_RV740:
869	sq_conf.num_ps_gprs = 84;
870	sq_conf.num_vs_gprs = 36;
871	sq_conf.num_temp_gprs = 4;
872	sq_conf.num_gs_gprs = 0;
873	sq_conf.num_es_gprs = 0;
874	sq_conf.num_ps_threads = 188;
875	sq_conf.num_vs_threads = 60;
876	sq_conf.num_gs_threads = 0;
877	sq_conf.num_es_threads = 0;
878	sq_conf.num_ps_stack_entries = 128;
879	sq_conf.num_vs_stack_entries = 128;
880	sq_conf.num_gs_stack_entries = 0;
881	sq_conf.num_es_stack_entries = 0;
882	break;
883    case CHIP_FAMILY_RV710:
884	sq_conf.num_ps_gprs = 192;
885	sq_conf.num_vs_gprs = 56;
886	sq_conf.num_temp_gprs = 4;
887	sq_conf.num_gs_gprs = 0;
888	sq_conf.num_es_gprs = 0;
889	sq_conf.num_ps_threads = 144;
890	sq_conf.num_vs_threads = 48;
891	sq_conf.num_gs_threads = 0;
892	sq_conf.num_es_threads = 0;
893	sq_conf.num_ps_stack_entries = 128;
894	sq_conf.num_vs_stack_entries = 128;
895	sq_conf.num_gs_stack_entries = 0;
896	sq_conf.num_es_stack_entries = 0;
897	break;
898    }
899
900    r600_sq_setup(pScrn, &sq_conf);
901
902    /* set fake reloc for unused depth */
903    BEGIN_BATCH(3 + 2);
904    EREG(DB_DEPTH_INFO, 0);
905    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
906    END_BATCH();
907
908    BEGIN_BATCH(80);
909    if (info->ChipFamily < CHIP_FAMILY_RV770) {
910	EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
911			       (28 << TD_FIFO_CREDIT_shift)));
912	EREG(VC_ENHANCE, 0);
913	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
914	EREG(DB_DEBUG, 0x82000000); /* ? */
915	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
916				 (16 << DEPTH_FLUSH_shift) |
917				 (0 << FORCE_SUMMARIZE_shift) |
918				 (4 << DEPTH_PENDING_FREE_shift) |
919				 (16 << DEPTH_CACHELINE_FREE_shift) |
920				 0));
921    } else {
922	EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
923			       (28 << TD_FIFO_CREDIT_shift)));
924	EREG(VC_ENHANCE, 0);
925	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
926	EREG(DB_DEBUG, 0);
927	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
928				 (16 << DEPTH_FLUSH_shift) |
929				 (0 << FORCE_SUMMARIZE_shift) |
930				 (4 << DEPTH_PENDING_FREE_shift) |
931				 (4 << DEPTH_CACHELINE_FREE_shift) |
932				 0));
933    }
934
935    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
936    E32(0);
937    E32(0);
938
939    PACK0(SQ_ESGS_RING_ITEMSIZE, 9);
940    E32(0); // SQ_ESGS_RING_ITEMSIZE
941    E32(0); // SQ_GSVS_RING_ITEMSIZE
942    E32(0); // SQ_ESTMP_RING_ITEMSIZE
943    E32(0); // SQ_GSTMP_RING_ITEMSIZE
944    E32(0); // SQ_VSTMP_RING_ITEMSIZE
945    E32(0); // SQ_PSTMP_RING_ITEMSIZE
946    E32(0); // SQ_FBUF_RING_ITEMSIZE
947    E32(0); // SQ_REDUC_RING_ITEMSIZE
948    E32(0); // SQ_GS_VERT_ITEMSIZE
949
950    // DB
951    EREG(DB_DEPTH_CONTROL,                    0);
952    PACK0(DB_RENDER_CONTROL, 2);
953    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
954    if (info->ChipFamily < CHIP_FAMILY_RV770)
955	E32(FORCE_SHADER_Z_ORDER_bit);
956    else
957	E32(0);
958    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
959						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
960						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
961						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
962    EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
963				 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
964
965    PACK0(DB_STENCIL_CLEAR, 2);
966    E32(0); // DB_STENCIL_CLEAR
967    E32(0); // DB_DEPTH_CLEAR
968
969    PACK0(DB_STENCILREFMASK, 3);
970    E32(0); // DB_STENCILREFMASK
971    E32(0); // DB_STENCILREFMASK_BF
972    E32(0); // SX_ALPHA_REF
973
974    PACK0(CB_CLRCMP_CONTROL, 4);
975    E32(1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
976    E32(0);							// CB_CLRCMP_SRC
977    E32(0);							// CB_CLRCMP_DST
978    E32(0);							// CB_CLRCMP_MSK
979
980    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
981    EREG(R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
982
983    PACK0(SX_ALPHA_TEST_CONTROL, 5);
984    E32(0); // SX_ALPHA_TEST_CONTROL
985    E32(0x00000000); // CB_BLEND_RED
986    E32(0x00000000); // CB_BLEND_GREEN
987    E32(0x00000000); // CB_BLEND_BLUE
988    E32(0x00000000); // CB_BLEND_ALPHA
989
990    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
991						   (0 << WINDOW_Y_OFFSET_shift)));
992
993    if (info->ChipFamily < CHIP_FAMILY_RV770)
994	EREG(R7xx_PA_SC_EDGERULE,             0x00000000);
995    else
996	EREG(R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
997
998    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
999
1000    END_BATCH();
1001
1002    /* clip boolean is set to always visible -> doesn't matter */
1003    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1004	r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192);
1005
1006    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1007	r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192);
1008
1009    BEGIN_BATCH(49);
1010    PACK0(PA_SC_MPASS_PS_CNTL, 2);
1011    E32(0);
1012    if (info->ChipFamily < CHIP_FAMILY_RV770)
1013	E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
1014    else
1015	E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
1016		 0x00500000)); /* ? */
1017
1018    PACK0(PA_SC_LINE_CNTL, 9);
1019    E32(0); // PA_SC_LINE_CNTL
1020    E32(0); // PA_SC_AA_CONFIG
1021    E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL
1022	     (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
1023    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1024    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1025    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1026    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1027    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_MCTX
1028    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M
1029
1030    EREG(PA_SC_AA_MASK,                       0xFFFFFFFF);
1031
1032    PACK0(PA_CL_CLIP_CNTL, 5);
1033    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1034    E32(FACE_bit);         // PA_SU_SC_MODE_CNTL
1035    E32(VTX_XY_FMT_bit);   // PA_CL_VTE_CNTL
1036    E32(0);                // PA_CL_VS_OUT_CNTL
1037    E32(0);                // PA_CL_NANINF_CNTL
1038
1039    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1040    E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL
1041    E32(0); // PA_SU_POLY_OFFSET_CLAMP
1042    E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE
1043    E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET
1044    E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE
1045    E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET
1046
1047    // SPI
1048    if (info->ChipFamily < CHIP_FAMILY_RV770)
1049	EREG(R7xx_SPI_THREAD_GROUPING,        0);
1050    else
1051	EREG(R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
1052
1053    /* default Interpolator setup */
1054    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1055			       (1 << SEMANTIC_1_shift)));
1056    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1057    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1058    E32(((0    << SEMANTIC_shift)	|
1059	     (0x01 << DEFAULT_VAL_shift)	|
1060	     SEL_CENTROID_bit));
1061    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1062    E32(((1    << SEMANTIC_shift)	|
1063	     (0x01 << DEFAULT_VAL_shift)	|
1064	     SEL_CENTROID_bit));
1065
1066    PACK0(SPI_INPUT_Z, 4);
1067    E32(0); // SPI_INPUT_Z
1068    E32(0); // SPI_FOG_CNTL
1069    E32(0); // SPI_FOG_FUNC_SCALE
1070    E32(0); // SPI_FOG_FUNC_BIAS
1071
1072    END_BATCH();
1073
1074    // clear FS
1075    fs_conf.bo = accel_state->shaders_bo;
1076    r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1077
1078    // VGT
1079    BEGIN_BATCH(46);
1080    PACK0(VGT_MAX_VTX_INDX, 4);
1081    E32(0xffffff); // VGT_MAX_VTX_INDX
1082    E32(0); // VGT_MIN_VTX_INDX
1083    E32(0); // VGT_INDX_OFFSET
1084    E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX
1085
1086    EREG(VGT_PRIMITIVEID_EN,                  0);
1087    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1088
1089    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1090    E32(0); // VGT_INSTANCE_STEP_RATE_0
1091    E32(0); // VGT_INSTANCE_STEP_RATE_1
1092
1093    PACK0(PA_SU_POINT_SIZE, 17);
1094    E32(0); // PA_SU_POINT_SIZE
1095    E32(0); // PA_SU_POINT_MINMAX
1096    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1097    E32(0); // PA_SC_LINE_STIPPLE
1098    E32(0); // VGT_OUTPUT_PATH_CNTL
1099    E32(0); // VGT_HOS_CNTL
1100    E32(0); // VGT_HOS_MAX_TESS_LEVEL
1101    E32(0); // VGT_HOS_MIN_TESS_LEVEL
1102    E32(0); // VGT_HOS_REUSE_DEPTH
1103    E32(0); // VGT_GROUP_PRIM_TYPE
1104    E32(0); // VGT_GROUP_FIRST_DECR
1105    E32(0); // VGT_GROUP_DECR
1106    E32(0); // VGT_GROUP_VECT_0_CNTL
1107    E32(0); // VGT_GROUP_VECT_1_CNTL
1108    E32(0); // VGT_GROUP_VECT_0_FMT_CNTL
1109    E32(0); // VGT_GROUP_VECT_1_FMT_CNTL
1110    E32(0); // VGT_GS_MODE
1111
1112    PACK0(VGT_STRMOUT_EN, 3);
1113    E32(0); // VGT_STRMOUT_EN
1114    E32(0); // VGT_REUSE_OFF
1115    E32(0); // VGT_VTX_CNT_EN
1116
1117    EREG(VGT_STRMOUT_BUFFER_EN,               0);
1118    EREG(SX_MISC,                             0);
1119    END_BATCH();
1120}
1121
1122
1123/*
1124 * Commands
1125 */
1126
1127void
1128r600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices)
1129{
1130    RADEONInfoPtr info = RADEONPTR(pScrn);
1131    uint32_t i, count;
1132
1133    // calculate num of packets
1134    count = 2;
1135    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1136	count += (draw_conf->num_indices + 1) / 2;
1137    else
1138	count += draw_conf->num_indices;
1139
1140    BEGIN_BATCH(8 + count);
1141    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1142    PACK3(IT_INDEX_TYPE, 1);
1143#if X_BYTE_ORDER == X_BIG_ENDIAN
1144    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1145#else
1146    E32(draw_conf->index_type);
1147#endif
1148    PACK3(IT_NUM_INSTANCES, 1);
1149    E32(draw_conf->num_instances);
1150
1151    PACK3(IT_DRAW_INDEX_IMMD, count);
1152    E32(draw_conf->num_indices);
1153    E32(draw_conf->vgt_draw_initiator);
1154
1155    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1156	for (i = 0; i < draw_conf->num_indices; i += 2) {
1157	    if ((i + 1) == draw_conf->num_indices)
1158		E32(indices[i]);
1159	    else
1160		E32((indices[i] | (indices[i + 1] << 16)));
1161	}
1162    } else {
1163	for (i = 0; i < draw_conf->num_indices; i++)
1164	    E32(indices[i]);
1165    }
1166    END_BATCH();
1167}
1168
1169void
1170r600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1171{
1172    RADEONInfoPtr info = RADEONPTR(pScrn);
1173
1174    BEGIN_BATCH(10);
1175    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1176    PACK3(IT_INDEX_TYPE, 1);
1177#if X_BYTE_ORDER == X_BIG_ENDIAN
1178    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1179#else
1180    E32(draw_conf->index_type);
1181#endif
1182    PACK3(IT_NUM_INSTANCES, 1);
1183    E32(draw_conf->num_instances);
1184    PACK3(IT_DRAW_INDEX_AUTO, 2);
1185    E32(draw_conf->num_indices);
1186    E32(draw_conf->vgt_draw_initiator);
1187    END_BATCH();
1188}
1189
1190void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1191{
1192    RADEONInfoPtr info = RADEONPTR(pScrn);
1193    struct radeon_accel_state *accel_state = info->accel_state;
1194    draw_config_t   draw_conf;
1195    vtx_resource_t  vtx_res;
1196
1197    if (accel_state->vbo.vb_start_op == -1)
1198	return;
1199
1200    CLEAR (draw_conf);
1201    CLEAR (vtx_res);
1202
1203    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1204        R600IBDiscard(pScrn);
1205	return;
1206    }
1207
1208    /* Vertex buffer setup */
1209    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1210    vtx_res.id              = SQ_VTX_RESOURCE_vs;
1211    vtx_res.vtx_size_dw     = vtx_size / 4;
1212    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1213    vtx_res.mem_req_size    = 1;
1214    vtx_res.vb_addr         = accel_state->vbo.vb_start_op;
1215    vtx_res.bo              = accel_state->vbo.vb_bo;
1216#if X_BYTE_ORDER == X_BIG_ENDIAN
1217    vtx_res.endian          = SQ_ENDIAN_8IN32;
1218#endif
1219    r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1220
1221    /* Draw */
1222    draw_conf.prim_type          = DI_PT_RECTLIST;
1223    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1224    draw_conf.num_instances      = 1;
1225    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1226    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1227
1228    r600_draw_auto(pScrn, &draw_conf);
1229
1230    /* XXX drm should handle this in fence submit */
1231    r600_wait_3d_idle_clean(pScrn);
1232
1233    /* sync dst surface */
1234    r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1235			     accel_state->dst_size, 0,
1236			     accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1237
1238    accel_state->vbo.vb_start_op = -1;
1239    accel_state->ib_reset_op = 0;
1240
1241}
1242
1243