Home | History | Annotate | Line # | Download | only in shared-core
      1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
      2  *
      3  * Copyright (C) The Weather Channel, Inc.  2002.
      4  * Copyright (C) 2004 Nicolai Haehnle.
      5  * All Rights Reserved.
      6  *
      7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
      8  * initial release of the Radeon 8500 driver under the XFree86 license.
      9  * This notice must be preserved.
     10  *
     11  * Permission is hereby granted, free of charge, to any person obtaining a
     12  * copy of this software and associated documentation files (the "Software"),
     13  * to deal in the Software without restriction, including without limitation
     14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     15  * and/or sell copies of the Software, and to permit persons to whom the
     16  * Software is furnished to do so, subject to the following conditions:
     17  *
     18  * The above copyright notice and this permission notice (including the next
     19  * paragraph) shall be included in all copies or substantial portions of the
     20  * Software.
     21  *
     22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     28  * DEALINGS IN THE SOFTWARE.
     29  *
     30  * Authors:
     31  *    Nicolai Haehnle <prefect_ (at) gmx.net>
     32  */
     33 
     34 #include "drmP.h"
     35 #include "drm.h"
     36 #include "radeon_drm.h"
     37 #include "radeon_drv.h"
     38 #include "r300_reg.h"
     39 
     40 #define R300_SIMULTANEOUS_CLIPRECTS		4
     41 
     42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
     43  */
     44 static const int r300_cliprect_cntl[4] = {
     45 	0xAAAA,
     46 	0xEEEE,
     47 	0xFEFE,
     48 	0xFFFE
     49 };
     50 
     51 /**
     52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
     53  * buffer, starting with index n.
     54  */
     55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
     56 			       drm_radeon_kcmd_buffer_t *cmdbuf, int n)
     57 {
     58 	struct drm_clip_rect box;
     59 	int nr;
     60 	int i;
     61 	RING_LOCALS;
     62 
     63 	nr = cmdbuf->nbox - n;
     64 	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
     65 		nr = R300_SIMULTANEOUS_CLIPRECTS;
     66 
     67 	DRM_DEBUG("%i cliprects\n", nr);
     68 
     69 	if (nr) {
     70 		BEGIN_RING(6 + nr * 2);
     71 		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
     72 
     73 		for (i = 0; i < nr; ++i) {
     74 			if (DRM_COPY_FROM_USER_UNCHECKED
     75 			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
     76 				DRM_ERROR("copy cliprect faulted\n");
     77 				return -EFAULT;
     78 			}
     79 
     80 			box.x2--; /* Hardware expects inclusive bottom-right corner */
     81 			box.y2--;
     82 
     83 			if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
     84 				box.x1 = (box.x1) &
     85 					R300_CLIPRECT_MASK;
     86 				box.y1 = (box.y1) &
     87 					R300_CLIPRECT_MASK;
     88 				box.x2 = (box.x2) &
     89 					R300_CLIPRECT_MASK;
     90 				box.y2 = (box.y2) &
     91 					R300_CLIPRECT_MASK;
     92 			} else {
     93 				box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
     94 					R300_CLIPRECT_MASK;
     95 				box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
     96 					R300_CLIPRECT_MASK;
     97 				box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
     98 					R300_CLIPRECT_MASK;
     99 				box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
    100 					R300_CLIPRECT_MASK;
    101 			}
    102 
    103 			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
    104 				 (box.y1 << R300_CLIPRECT_Y_SHIFT));
    105 			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
    106 				 (box.y2 << R300_CLIPRECT_Y_SHIFT));
    107 
    108 		}
    109 
    110 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
    111 
    112 		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
    113 		 * client might be able to trample over memory.
    114 		 * The impact should be very limited, but I'd rather be safe than
    115 		 * sorry.
    116 		 */
    117 		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
    118 		OUT_RING(0);
    119 		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
    120 		ADVANCE_RING();
    121 	} else {
    122 		/* Why we allow zero cliprect rendering:
    123 		 * There are some commands in a command buffer that must be submitted
    124 		 * even when there are no cliprects, e.g. DMA buffer discard
    125 		 * or state setting (though state setting could be avoided by
    126 		 * simulating a loss of context).
    127 		 *
    128 		 * Now since the cmdbuf interface is so chaotic right now (and is
    129 		 * bound to remain that way for a bit until things settle down),
    130 		 * it is basically impossible to filter out the commands that are
    131 		 * necessary and those that aren't.
    132 		 *
    133 		 * So I choose the safe way and don't do any filtering at all;
    134 		 * instead, I simply set up the engine so that all rendering
    135 		 * can't produce any fragments.
    136 		 */
    137 		BEGIN_RING(2);
    138 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
    139 		ADVANCE_RING();
    140 	}
    141 
    142 	/* flus cache and wait idle clean after cliprect change */
    143 	BEGIN_RING(2);
    144 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
    145 	OUT_RING(R300_RB3D_DC_FLUSH);
    146 	ADVANCE_RING();
    147 	BEGIN_RING(2);
    148 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
    149 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
    150 	ADVANCE_RING();
    151 	/* set flush flag */
    152 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
    153 
    154 	return 0;
    155 }
    156 
    157 static u8 r300_reg_flags[0x10000 >> 2];
    158 
    159 void r300_init_reg_flags(struct drm_device *dev)
    160 {
    161 	int i;
    162 	drm_radeon_private_t *dev_priv = dev->dev_private;
    163 
    164 	memset(r300_reg_flags, 0, 0x10000 >> 2);
    165 #define ADD_RANGE_MARK(reg, count,mark) \
    166 		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
    167 			r300_reg_flags[i]|=(mark);
    168 
    169 #define MARK_SAFE		1
    170 #define MARK_CHECK_OFFSET	2
    171 
    172 #define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
    173 
    174 	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
    175 	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
    176 	ADD_RANGE(R300_VAP_CNTL, 1);
    177 	ADD_RANGE(R300_SE_VTE_CNTL, 2);
    178 	ADD_RANGE(0x2134, 2);
    179 	ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
    180 	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
    181 	ADD_RANGE(0x21DC, 1);
    182 	ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
    183 	ADD_RANGE(R300_VAP_CLIP_X_0, 4);
    184 	ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
    185 	ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
    186 	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
    187 	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
    188 	ADD_RANGE(R300_GB_ENABLE, 1);
    189 	ADD_RANGE(R300_GB_MSPOS0, 5);
    190 	ADD_RANGE(R300_TX_INVALTAGS, 1);
    191 	ADD_RANGE(R300_TX_ENABLE, 1);
    192 	ADD_RANGE(0x4200, 4);
    193 	ADD_RANGE(0x4214, 1);
    194 	ADD_RANGE(R300_RE_POINTSIZE, 1);
    195 	ADD_RANGE(0x4230, 3);
    196 	ADD_RANGE(R300_RE_LINE_CNT, 1);
    197 	ADD_RANGE(R300_RE_UNK4238, 1);
    198 	ADD_RANGE(0x4260, 3);
    199 	ADD_RANGE(R300_RE_SHADE, 4);
    200 	ADD_RANGE(R300_RE_POLYGON_MODE, 5);
    201 	ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
    202 	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
    203 	ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
    204 	ADD_RANGE(R300_RE_CULL_CNTL, 1);
    205 	ADD_RANGE(0x42C0, 2);
    206 	ADD_RANGE(R300_RS_CNTL_0, 2);
    207 
    208 	ADD_RANGE(R300_SC_HYPERZ, 2);
    209 	ADD_RANGE(0x43E8, 1);
    210 
    211 	ADD_RANGE(0x46A4, 5);
    212 
    213 	ADD_RANGE(R300_RE_FOG_STATE, 1);
    214 	ADD_RANGE(R300_FOG_COLOR_R, 3);
    215 	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
    216 	ADD_RANGE(0x4BD8, 1);
    217 	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
    218 	ADD_RANGE(0x4E00, 1);
    219 	ADD_RANGE(R300_RB3D_CBLEND, 2);
    220 	ADD_RANGE(R300_RB3D_COLORMASK, 1);
    221 	ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
    222 	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */
    223 	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
    224 	ADD_RANGE(0x4E50, 9);
    225 	ADD_RANGE(0x4E88, 1);
    226 	ADD_RANGE(0x4EA0, 2);
    227 	ADD_RANGE(R300_ZB_CNTL, 3);
    228 	ADD_RANGE(R300_ZB_FORMAT, 4);
    229 	ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */
    230 	ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
    231 	ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
    232 	ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
    233 
    234 	ADD_RANGE(R300_TX_FILTER_0, 16);
    235 	ADD_RANGE(R300_TX_FILTER1_0, 16);
    236 	ADD_RANGE(R300_TX_SIZE_0, 16);
    237 	ADD_RANGE(R300_TX_FORMAT_0, 16);
    238 	ADD_RANGE(R300_TX_PITCH_0, 16);
    239 	/* Texture offset is dangerous and needs more checking */
    240 	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
    241 	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
    242 	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
    243 
    244 	/* Sporadic registers used as primitives are emitted */
    245 	ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
    246 	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
    247 	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
    248 	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
    249 
    250 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
    251 		ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
    252 		ADD_RANGE(R500_US_CONFIG, 2);
    253 		ADD_RANGE(R500_US_CODE_ADDR, 3);
    254 		ADD_RANGE(R500_US_FC_CTRL, 1);
    255 		ADD_RANGE(R500_RS_IP_0, 16);
    256 		ADD_RANGE(R500_RS_INST_0, 16);
    257 		ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
    258 		ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
    259 		ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
    260 	} else {
    261 		ADD_RANGE(R300_PFS_CNTL_0, 3);
    262 		ADD_RANGE(R300_PFS_NODE_0, 4);
    263 		ADD_RANGE(R300_PFS_TEXI_0, 64);
    264 		ADD_RANGE(R300_PFS_INSTR0_0, 64);
    265 		ADD_RANGE(R300_PFS_INSTR1_0, 64);
    266 		ADD_RANGE(R300_PFS_INSTR2_0, 64);
    267 		ADD_RANGE(R300_PFS_INSTR3_0, 64);
    268 		ADD_RANGE(R300_RS_INTERP_0, 8);
    269 		ADD_RANGE(R300_RS_ROUTE_0, 8);
    270 
    271 	}
    272 }
    273 
    274 static __inline__ int r300_check_range(unsigned reg, int count)
    275 {
    276 	int i;
    277 	if (reg & ~0xffff)
    278 		return -1;
    279 	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
    280 		if (r300_reg_flags[i] != MARK_SAFE)
    281 			return 1;
    282 	return 0;
    283 }
    284 
    285 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
    286 							  dev_priv,
    287 							  drm_radeon_kcmd_buffer_t
    288 							  * cmdbuf,
    289 							  drm_r300_cmd_header_t
    290 							  header)
    291 {
    292 	int reg;
    293 	int sz;
    294 	int i;
    295 	int values[64];
    296 	RING_LOCALS;
    297 
    298 	sz = header.packet0.count;
    299 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
    300 
    301 	if ((sz > 64) || (sz < 0)) {
    302 		DRM_ERROR
    303 		    ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
    304 		     reg, sz);
    305 		return -EINVAL;
    306 	}
    307 	for (i = 0; i < sz; i++) {
    308 		values[i] = ((int *)cmdbuf->buf)[i];
    309 		switch (r300_reg_flags[(reg >> 2) + i]) {
    310 		case MARK_SAFE:
    311 			break;
    312 		case MARK_CHECK_OFFSET:
    313 			if (!radeon_check_offset(dev_priv, (u32) values[i])) {
    314 				DRM_ERROR
    315 				    ("Offset failed range check (reg=%04x sz=%d)\n",
    316 				     reg, sz);
    317 				return -EINVAL;
    318 			}
    319 			break;
    320 		default:
    321 			DRM_ERROR("Register %04x failed check as flag=%02x\n",
    322 				  reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
    323 			return -EINVAL;
    324 		}
    325 	}
    326 
    327 	BEGIN_RING(1 + sz);
    328 	OUT_RING(CP_PACKET0(reg, sz - 1));
    329 	OUT_RING_TABLE(values, sz);
    330 	ADVANCE_RING();
    331 
    332 	cmdbuf->buf += sz * 4;
    333 	cmdbuf->bufsz -= sz * 4;
    334 
    335 	return 0;
    336 }
    337 
    338 /**
    339  * Emits a packet0 setting arbitrary registers.
    340  * Called by r300_do_cp_cmdbuf.
    341  *
    342  * Note that checks are performed on contents and addresses of the registers
    343  */
    344 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
    345 					drm_radeon_kcmd_buffer_t *cmdbuf,
    346 					drm_r300_cmd_header_t header)
    347 {
    348 	int reg;
    349 	int sz;
    350 	RING_LOCALS;
    351 
    352 	sz = header.packet0.count;
    353 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
    354 
    355 	DRM_DEBUG("R300_CMD_PACKET0: reg %04x, sz %d\n", reg, sz);
    356 	if (!sz)
    357 		return 0;
    358 
    359 	if (sz * 4 > cmdbuf->bufsz)
    360 		return -EINVAL;
    361 
    362 	if (reg + sz * 4 >= 0x10000) {
    363 		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
    364 			  sz);
    365 		return -EINVAL;
    366 	}
    367 
    368 	if (r300_check_range(reg, sz)) {
    369 		/* go and check everything */
    370 		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
    371 							   header);
    372 	}
    373 	/* the rest of the data is safe to emit, whatever the values the user passed */
    374 
    375 	BEGIN_RING(1 + sz);
    376 	OUT_RING(CP_PACKET0(reg, sz - 1));
    377 	OUT_RING_TABLE((int *)cmdbuf->buf, sz);
    378 	ADVANCE_RING();
    379 
    380 	cmdbuf->buf += sz * 4;
    381 	cmdbuf->bufsz -= sz * 4;
    382 
    383 	return 0;
    384 }
    385 
    386 /**
    387  * Uploads user-supplied vertex program instructions or parameters onto
    388  * the graphics card.
    389  * Called by r300_do_cp_cmdbuf.
    390  */
    391 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
    392 				    drm_radeon_kcmd_buffer_t *cmdbuf,
    393 				    drm_r300_cmd_header_t header)
    394 {
    395 	int sz;
    396 	int addr;
    397 	RING_LOCALS;
    398 
    399 	sz = header.vpu.count;
    400 	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
    401 
    402 	if (!sz)
    403 		return 0;
    404 	if (sz * 16 > cmdbuf->bufsz)
    405 		return -EINVAL;
    406 
    407 	/* VAP is very sensitive so we purge cache before we program it
    408 	 * and we also flush its state before & after */
    409 	BEGIN_RING(6);
    410 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
    411 	OUT_RING(R300_RB3D_DC_FLUSH);
    412 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
    413 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
    414 	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
    415 	OUT_RING(0);
    416 	ADVANCE_RING();
    417 	/* set flush flag */
    418 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
    419 
    420 	BEGIN_RING(3 + sz * 4);
    421 	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
    422 	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
    423 	OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
    424 	ADVANCE_RING();
    425 
    426 	BEGIN_RING(2);
    427 	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
    428 	OUT_RING(0);
    429 	ADVANCE_RING();
    430 
    431 	cmdbuf->buf += sz * 16;
    432 	cmdbuf->bufsz -= sz * 16;
    433 
    434 	return 0;
    435 }
    436 
    437 /**
    438  * Emit a clear packet from userspace.
    439  * Called by r300_emit_packet3.
    440  */
    441 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
    442 				      drm_radeon_kcmd_buffer_t *cmdbuf)
    443 {
    444 	RING_LOCALS;
    445 
    446 	if (8 * 4 > cmdbuf->bufsz)
    447 		return -EINVAL;
    448 
    449 	BEGIN_RING(10);
    450 	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
    451 	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
    452 		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
    453 	OUT_RING_TABLE((int *)cmdbuf->buf, 8);
    454 	ADVANCE_RING();
    455 
    456 	BEGIN_RING(4);
    457 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
    458 	OUT_RING(R300_RB3D_DC_FLUSH);
    459 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
    460 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
    461 	ADVANCE_RING();
    462 	/* set flush flag */
    463 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
    464 
    465 	cmdbuf->buf += 8 * 4;
    466 	cmdbuf->bufsz -= 8 * 4;
    467 
    468 	return 0;
    469 }
    470 
    471 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
    472 					       drm_radeon_kcmd_buffer_t *cmdbuf,
    473 					       u32 header)
    474 {
    475 	int count, i, k;
    476 #define MAX_ARRAY_PACKET  64
    477 	u32 payload[MAX_ARRAY_PACKET];
    478 	u32 narrays;
    479 	RING_LOCALS;
    480 
    481 	count = (header >> 16) & 0x3fff;
    482 
    483 	if ((count + 1) > MAX_ARRAY_PACKET) {
    484 		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
    485 			  count);
    486 		return -EINVAL;
    487 	}
    488 	memset(payload, 0, MAX_ARRAY_PACKET * 4);
    489 	memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
    490 
    491 	/* carefully check packet contents */
    492 
    493 	narrays = payload[0];
    494 	k = 0;
    495 	i = 1;
    496 	while ((k < narrays) && (i < (count + 1))) {
    497 		i++;		/* skip attribute field */
    498 		if (!radeon_check_offset(dev_priv, payload[i])) {
    499 			DRM_ERROR
    500 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
    501 			     k, i);
    502 			return -EINVAL;
    503 		}
    504 		k++;
    505 		i++;
    506 		if (k == narrays)
    507 			break;
    508 		/* have one more to process, they come in pairs */
    509 		if (!radeon_check_offset(dev_priv, payload[i])) {
    510 			DRM_ERROR
    511 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
    512 			     k, i);
    513 			return -EINVAL;
    514 		}
    515 		k++;
    516 		i++;
    517 	}
    518 	/* do the counts match what we expect ? */
    519 	if ((k != narrays) || (i != (count + 1))) {
    520 		DRM_ERROR
    521 		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
    522 		     k, i, narrays, count + 1);
    523 		return -EINVAL;
    524 	}
    525 
    526 	/* all clear, output packet */
    527 
    528 	BEGIN_RING(count + 2);
    529 	OUT_RING(header);
    530 	OUT_RING_TABLE(payload, count + 1);
    531 	ADVANCE_RING();
    532 
    533 	cmdbuf->buf += (count + 2) * 4;
    534 	cmdbuf->bufsz -= (count + 2) * 4;
    535 
    536 	return 0;
    537 }
    538 
    539 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
    540 					     drm_radeon_kcmd_buffer_t *cmdbuf)
    541 {
    542 	u32 *cmd = (u32 *) cmdbuf->buf;
    543 	int count, ret;
    544 	RING_LOCALS;
    545 
    546 	count=(cmd[0]>>16) & 0x3fff;
    547 
    548 	if (cmd[0] & 0x8000) {
    549 		u32 offset;
    550 
    551 		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
    552 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
    553 			offset = cmd[2] << 10;
    554 			ret = !radeon_check_offset(dev_priv, offset);
    555 			if (ret) {
    556 				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
    557 				return -EINVAL;
    558 			}
    559 		}
    560 
    561 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
    562 		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
    563 			offset = cmd[3] << 10;
    564 			ret = !radeon_check_offset(dev_priv, offset);
    565 			if (ret) {
    566 				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
    567 				return -EINVAL;
    568 			}
    569 
    570 		}
    571 	}
    572 
    573 	BEGIN_RING(count+2);
    574 	OUT_RING(cmd[0]);
    575 	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
    576 	ADVANCE_RING();
    577 
    578 	cmdbuf->buf += (count+2)*4;
    579 	cmdbuf->bufsz -= (count+2)*4;
    580 
    581 	return 0;
    582 }
    583 
    584 static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
    585 					    drm_radeon_kcmd_buffer_t *cmdbuf)
    586 {
    587 	u32 *cmd;
    588 	int count;
    589 	int expected_count;
    590 	RING_LOCALS;
    591 
    592 	cmd = (u32 *) cmdbuf->buf;
    593 	count = (cmd[0]>>16) & 0x3fff;
    594 	expected_count = cmd[1] >> 16;
    595 	if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
    596 		expected_count = (expected_count+1)/2;
    597 
    598 	if (count && count != expected_count) {
    599 		DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
    600 			count, expected_count);
    601 		return -EINVAL;
    602 	}
    603 
    604 	BEGIN_RING(count+2);
    605 	OUT_RING(cmd[0]);
    606 	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
    607 	ADVANCE_RING();
    608 
    609 	cmdbuf->buf += (count+2)*4;
    610 	cmdbuf->bufsz -= (count+2)*4;
    611 
    612 	if (!count) {
    613 		drm_r300_cmd_header_t header;
    614 
    615 		if (cmdbuf->bufsz < 4*4 + sizeof(header)) {
    616 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
    617 			return -EINVAL;
    618 		}
    619 
    620 		header.u = *(unsigned int *)cmdbuf->buf;
    621 
    622 		cmdbuf->buf += sizeof(header);
    623 		cmdbuf->bufsz -= sizeof(header);
    624 		cmd = (u32 *) cmdbuf->buf;
    625 
    626 		if (header.header.cmd_type != R300_CMD_PACKET3 ||
    627 		    header.packet3.packet != R300_CMD_PACKET3_RAW ||
    628 		    cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
    629 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
    630 			return -EINVAL;
    631 		}
    632 
    633 		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
    634 			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
    635 			return -EINVAL;
    636 		}
    637 		if (!radeon_check_offset(dev_priv, cmd[2])) {
    638 			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
    639 			return -EINVAL;
    640 		}
    641 		if (cmd[3] != expected_count) {
    642 			DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
    643 				cmd[3], expected_count);
    644 			return -EINVAL;
    645 		}
    646 
    647 		BEGIN_RING(4);
    648 		OUT_RING(cmd[0]);
    649 		OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3);
    650 		ADVANCE_RING();
    651 
    652 		cmdbuf->buf += 4*4;
    653 		cmdbuf->bufsz -= 4*4;
    654 	}
    655 
    656 	return 0;
    657 }
    658 
    659 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
    660 					    drm_radeon_kcmd_buffer_t *cmdbuf)
    661 {
    662 	u32 header;
    663 	int count;
    664 	RING_LOCALS;
    665 
    666 	if (4 > cmdbuf->bufsz)
    667 		return -EINVAL;
    668 
    669 	/* Fixme !! This simply emits a packet without much checking.
    670 	   We need to be smarter. */
    671 
    672 	/* obtain first word - actual packet3 header */
    673 	header = *(u32 *) cmdbuf->buf;
    674 
    675 	/* Is it packet 3 ? */
    676 	if ((header >> 30) != 0x3) {
    677 		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
    678 		return -EINVAL;
    679 	}
    680 
    681 	count = (header >> 16) & 0x3fff;
    682 
    683 	/* Check again now that we know how much data to expect */
    684 	if ((count + 2) * 4 > cmdbuf->bufsz) {
    685 		DRM_ERROR
    686 		    ("Expected packet3 of length %d but have only %d bytes left\n",
    687 		     (count + 2) * 4, cmdbuf->bufsz);
    688 		return -EINVAL;
    689 	}
    690 
    691 	/* Is it a packet type we know about ? */
    692 	switch (header & 0xff00) {
    693 	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
    694 		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
    695 
    696 	case RADEON_CNTL_BITBLT_MULTI:
    697 		return r300_emit_bitblt_multi(dev_priv, cmdbuf);
    698 
    699 	case RADEON_CP_INDX_BUFFER:
    700 		DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
    701 		return -EINVAL;
    702 	case RADEON_CP_3D_DRAW_IMMD_2:
    703 		/* triggers drawing using in-packet vertex data */
    704 	case RADEON_CP_3D_DRAW_VBUF_2:
    705 		/* triggers drawing of vertex buffers setup elsewhere */
    706 		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
    707 					   RADEON_PURGE_EMITED);
    708 		break;
    709 	case RADEON_CP_3D_DRAW_INDX_2:
    710 		/* triggers drawing using indices to vertex buffer */
    711 		/* whenever we send vertex we clear flush & purge */
    712 		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
    713 					   RADEON_PURGE_EMITED);
    714 		return r300_emit_draw_indx_2(dev_priv, cmdbuf);
    715 	case RADEON_WAIT_FOR_IDLE:
    716 	case RADEON_CP_NOP:
    717 		/* these packets are safe */
    718 		break;
    719 	default:
    720 		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
    721 		return -EINVAL;
    722 	}
    723 
    724 	BEGIN_RING(count + 2);
    725 	OUT_RING(header);
    726 	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
    727 	ADVANCE_RING();
    728 
    729 	cmdbuf->buf += (count + 2) * 4;
    730 	cmdbuf->bufsz -= (count + 2) * 4;
    731 
    732 	return 0;
    733 }
    734 
    735 /**
    736  * Emit a rendering packet3 from userspace.
    737  * Called by r300_do_cp_cmdbuf.
    738  */
    739 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
    740 					drm_radeon_kcmd_buffer_t *cmdbuf,
    741 					drm_r300_cmd_header_t header)
    742 {
    743 	int n;
    744 	int ret;
    745 	char *orig_buf = cmdbuf->buf;
    746 	int orig_bufsz = cmdbuf->bufsz;
    747 
    748 	/* This is a do-while-loop so that we run the interior at least once,
    749 	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
    750 	 */
    751 	n = 0;
    752 	do {
    753 		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
    754 			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
    755 			if (ret)
    756 				return ret;
    757 
    758 			cmdbuf->buf = orig_buf;
    759 			cmdbuf->bufsz = orig_bufsz;
    760 		}
    761 
    762 		switch (header.packet3.packet) {
    763 		case R300_CMD_PACKET3_CLEAR:
    764 			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
    765 			ret = r300_emit_clear(dev_priv, cmdbuf);
    766 			if (ret) {
    767 				DRM_ERROR("r300_emit_clear failed\n");
    768 				return ret;
    769 			}
    770 			break;
    771 
    772 		case R300_CMD_PACKET3_RAW:
    773 			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
    774 			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
    775 			if (ret) {
    776 				DRM_ERROR("r300_emit_raw_packet3 failed\n");
    777 				return ret;
    778 			}
    779 			break;
    780 
    781 		default:
    782 			DRM_ERROR("bad packet3 type %i at %p\n",
    783 				  header.packet3.packet,
    784 				  cmdbuf->buf - sizeof(header));
    785 			return -EINVAL;
    786 		}
    787 
    788 		n += R300_SIMULTANEOUS_CLIPRECTS;
    789 	} while (n < cmdbuf->nbox);
    790 
    791 	return 0;
    792 }
    793 
    794 /* Some of the R300 chips seem to be extremely touchy about the two registers
    795  * that are configured in r300_pacify.
    796  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
    797  * sends a command buffer that contains only state setting commands and a
    798  * vertex program/parameter upload sequence, this will eventually lead to a
    799  * lockup, unless the sequence is bracketed by calls to r300_pacify.
    800  * So we should take great care to *always* call r300_pacify before
    801  * *anything* 3D related, and again afterwards. This is what the
    802  * call bracket in r300_do_cp_cmdbuf is for.
    803  */
    804 
    805 /**
    806  * Emit the sequence to pacify R300.
    807  */
    808 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
    809 {
    810 	uint32_t cache_z, cache_3d, cache_2d;
    811 	RING_LOCALS;
    812 
    813 	cache_z = R300_ZC_FLUSH;
    814 	cache_2d = R300_RB2D_DC_FLUSH;
    815 	cache_3d = R300_RB3D_DC_FLUSH;
    816 	if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
    817 		/* we can purge, primitive where draw since last purge */
    818 		cache_z |= R300_ZC_FREE;
    819 		cache_2d |= R300_RB2D_DC_FREE;
    820 		cache_3d |= R300_RB3D_DC_FREE;
    821 	}
    822 
    823 	/* flush & purge zbuffer */
    824 	BEGIN_RING(2);
    825 	OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
    826 	OUT_RING(cache_z);
    827 	ADVANCE_RING();
    828 	/* flush & purge 3d */
    829 	BEGIN_RING(2);
    830 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
    831 	OUT_RING(cache_3d);
    832 	ADVANCE_RING();
    833 	/* flush & purge texture */
    834 	BEGIN_RING(2);
    835 	OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
    836 	OUT_RING(0);
    837 	ADVANCE_RING();
    838 	/* FIXME: is this one really needed ? */
    839 	BEGIN_RING(2);
    840 	OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
    841 	OUT_RING(0);
    842 	ADVANCE_RING();
    843 	BEGIN_RING(2);
    844 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
    845 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
    846 	ADVANCE_RING();
    847 	/* flush & purge 2d through E2 as RB2D will trigger lockup */
    848 	BEGIN_RING(4);
    849 	OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
    850 	OUT_RING(cache_2d);
    851 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
    852 	OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
    853 		 RADEON_WAIT_HOST_IDLECLEAN);
    854 	ADVANCE_RING();
    855 	/* set flush & purge flags */
    856 	dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
    857 }
    858 
    859 /**
    860  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
    861  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
    862  * be careful about how this function is called.
    863  */
    864 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
    865 {
    866 	drm_radeon_private_t *dev_priv = dev->dev_private;
    867 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
    868 
    869 	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
    870 	buf->pending = 1;
    871 	buf->used = 0;
    872 }
    873 
    874 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
    875 			  drm_r300_cmd_header_t header)
    876 {
    877 	u32 wait_until;
    878 	RING_LOCALS;
    879 
    880 	if (!header.wait.flags)
    881 		return;
    882 
    883 	wait_until = 0;
    884 
    885 	switch(header.wait.flags) {
    886 	case R300_WAIT_2D:
    887 		wait_until = RADEON_WAIT_2D_IDLE;
    888 		break;
    889 	case R300_WAIT_3D:
    890 		wait_until = RADEON_WAIT_3D_IDLE;
    891 		break;
    892 	case R300_NEW_WAIT_2D_3D:
    893 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
    894 		break;
    895 	case R300_NEW_WAIT_2D_2D_CLEAN:
    896 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
    897 		break;
    898 	case R300_NEW_WAIT_3D_3D_CLEAN:
    899 		wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
    900 		break;
    901 	case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
    902 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
    903 		wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
    904 		break;
    905 	default:
    906 		return;
    907 	}
    908 
    909 	BEGIN_RING(2);
    910 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
    911 	OUT_RING(wait_until);
    912 	ADVANCE_RING();
    913 }
    914 
    915 static int r300_scratch(drm_radeon_private_t *dev_priv,
    916 			drm_radeon_kcmd_buffer_t *cmdbuf,
    917 			drm_r300_cmd_header_t header)
    918 {
    919 	u32 *ref_age_base;
    920 	u32 i, buf_idx, h_pending;
    921 	RING_LOCALS;
    922 
    923 	if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
    924 		return -EINVAL;
    925 	}
    926 
    927 	if (header.scratch.reg >= 5) {
    928 		return -EINVAL;
    929 	}
    930 
    931 	dev_priv->scratch_ages[header.scratch.reg] ++;
    932 
    933 	ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
    934 
    935 	cmdbuf->buf += sizeof(uint64_t);
    936 	cmdbuf->bufsz -= sizeof(uint64_t);
    937 
    938 	for (i=0; i < header.scratch.n_bufs; i++) {
    939 		buf_idx = *(u32 *)cmdbuf->buf;
    940 		buf_idx *= 2; /* 8 bytes per buf */
    941 
    942 		if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
    943 			return -EINVAL;
    944 		}
    945 
    946 		if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
    947 			return -EINVAL;
    948 		}
    949 
    950 		if (h_pending == 0) {
    951 			return -EINVAL;
    952 		}
    953 
    954 		h_pending--;
    955 
    956 		if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
    957 			return -EINVAL;
    958 		}
    959 
    960 		cmdbuf->buf += sizeof(buf_idx);
    961 		cmdbuf->bufsz -= sizeof(buf_idx);
    962 	}
    963 
    964 	BEGIN_RING(2);
    965 	OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
    966 	OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
    967 	ADVANCE_RING();
    968 
    969 	return 0;
    970 }
    971 
    972 /**
    973  * Uploads user-supplied vertex program instructions or parameters onto
    974  * the graphics card.
    975  * Called by r300_do_cp_cmdbuf.
    976  */
    977 static __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
    978 				       drm_radeon_kcmd_buffer_t *cmdbuf,
    979 				       drm_r300_cmd_header_t header)
    980 {
    981 	int sz;
    982 	int addr;
    983 	int type;
    984 	int clamp;
    985 	int stride;
    986 	RING_LOCALS;
    987 
    988 	sz = header.r500fp.count;
    989 	/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
    990 	addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
    991 
    992 	type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
    993 	clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
    994 
    995 	addr |= (type << 16);
    996 	addr |= (clamp << 17);
    997 
    998 	stride = type ? 4 : 6;
    999 
   1000 	DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
   1001 	if (!sz)
   1002 		return 0;
   1003 	if (sz * stride * 4 > cmdbuf->bufsz)
   1004 		return -EINVAL;
   1005 
   1006 	BEGIN_RING(3 + sz * stride);
   1007 	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
   1008 	OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
   1009 	OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
   1010 
   1011 	ADVANCE_RING();
   1012 
   1013 	cmdbuf->buf += sz * stride * 4;
   1014 	cmdbuf->bufsz -= sz * stride * 4;
   1015 
   1016 	return 0;
   1017 }
   1018 
   1019 
   1020 /**
   1021  * Parses and validates a user-supplied command buffer and emits appropriate
   1022  * commands on the DMA ring buffer.
   1023  * Called by the ioctl handler function radeon_cp_cmdbuf.
   1024  */
   1025 int r300_do_cp_cmdbuf(struct drm_device *dev,
   1026 		      struct drm_file *file_priv,
   1027 		      drm_radeon_kcmd_buffer_t *cmdbuf)
   1028 {
   1029 	drm_radeon_private_t *dev_priv = dev->dev_private;
   1030 	struct drm_device_dma *dma = dev->dma;
   1031 	struct drm_buf *buf = NULL;
   1032 	int emit_dispatch_age = 0;
   1033 	int ret = 0;
   1034 
   1035 	DRM_DEBUG("\n");
   1036 
   1037 	/* pacify */
   1038 	r300_pacify(dev_priv);
   1039 
   1040 	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
   1041 		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
   1042 		if (ret)
   1043 			goto cleanup;
   1044 	}
   1045 
   1046 	while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
   1047 		int idx;
   1048 		drm_r300_cmd_header_t header;
   1049 
   1050 		header.u = *(unsigned int *)cmdbuf->buf;
   1051 
   1052 		cmdbuf->buf += sizeof(header);
   1053 		cmdbuf->bufsz -= sizeof(header);
   1054 
   1055 		switch (header.header.cmd_type) {
   1056 		case R300_CMD_PACKET0:
   1057 			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
   1058 			if (ret) {
   1059 				DRM_ERROR("r300_emit_packet0 failed\n");
   1060 				goto cleanup;
   1061 			}
   1062 			break;
   1063 
   1064 		case R300_CMD_VPU:
   1065 			DRM_DEBUG("R300_CMD_VPU\n");
   1066 			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
   1067 			if (ret) {
   1068 				DRM_ERROR("r300_emit_vpu failed\n");
   1069 				goto cleanup;
   1070 			}
   1071 			break;
   1072 
   1073 		case R300_CMD_PACKET3:
   1074 			DRM_DEBUG("R300_CMD_PACKET3\n");
   1075 			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
   1076 			if (ret) {
   1077 				DRM_ERROR("r300_emit_packet3 failed\n");
   1078 				goto cleanup;
   1079 			}
   1080 			break;
   1081 
   1082 		case R300_CMD_END3D:
   1083 			DRM_DEBUG("R300_CMD_END3D\n");
   1084 			/* TODO:
   1085 			   Ideally userspace driver should not need to issue this call,
   1086 			   i.e. the drm driver should issue it automatically and prevent
   1087 			   lockups.
   1088 
   1089 			   In practice, we do not understand why this call is needed and what
   1090 			   it does (except for some vague guesses that it has to do with cache
   1091 			   coherence) and so the user space driver does it.
   1092 
   1093 			   Once we are sure which uses prevent lockups the code could be moved
   1094 			   into the kernel and the userspace driver will not
   1095 			   need to use this command.
   1096 
   1097 			   Note that issuing this command does not hurt anything
   1098 			   except, possibly, performance */
   1099 			r300_pacify(dev_priv);
   1100 			break;
   1101 
   1102 		case R300_CMD_CP_DELAY:
   1103 			/* simple enough, we can do it here */
   1104 			DRM_DEBUG("R300_CMD_CP_DELAY\n");
   1105 			{
   1106 				int i;
   1107 				RING_LOCALS;
   1108 
   1109 				BEGIN_RING(header.delay.count);
   1110 				for (i = 0; i < header.delay.count; i++)
   1111 					OUT_RING(RADEON_CP_PACKET2);
   1112 				ADVANCE_RING();
   1113 			}
   1114 			break;
   1115 
   1116 		case R300_CMD_DMA_DISCARD:
   1117 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
   1118 			idx = header.dma.buf_idx;
   1119 			if (idx < 0 || idx >= dma->buf_count) {
   1120 				DRM_ERROR("buffer index %d (of %d max)\n",
   1121 					  idx, dma->buf_count - 1);
   1122 				ret = -EINVAL;
   1123 				goto cleanup;
   1124 			}
   1125 
   1126 			buf = dma->buflist[idx];
   1127 			if (buf->file_priv != file_priv || buf->pending) {
   1128 				DRM_ERROR("bad buffer %p %p %d\n",
   1129 					  buf->file_priv, file_priv,
   1130 					  buf->pending);
   1131 				ret = -EINVAL;
   1132 				goto cleanup;
   1133 			}
   1134 
   1135 			emit_dispatch_age = 1;
   1136 			r300_discard_buffer(dev, buf);
   1137 			break;
   1138 
   1139 		case R300_CMD_WAIT:
   1140 			DRM_DEBUG("R300_CMD_WAIT\n");
   1141 			r300_cmd_wait(dev_priv, header);
   1142 			break;
   1143 
   1144 		case R300_CMD_SCRATCH:
   1145 			DRM_DEBUG("R300_CMD_SCRATCH\n");
   1146 			ret = r300_scratch(dev_priv, cmdbuf, header);
   1147 			if (ret) {
   1148 				DRM_ERROR("r300_scratch failed\n");
   1149 				goto cleanup;
   1150 			}
   1151 			break;
   1152 
   1153 		case R300_CMD_R500FP:
   1154 			if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
   1155 				DRM_ERROR("Calling r500 command on r300 card\n");
   1156 				ret = -EINVAL;
   1157 				goto cleanup;
   1158 			}
   1159 			DRM_DEBUG("R300_CMD_R500FP\n");
   1160 			ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
   1161 			if (ret) {
   1162 				DRM_ERROR("r300_emit_r500fp failed\n");
   1163 				goto cleanup;
   1164 			}
   1165 			break;
   1166 		default:
   1167 			DRM_ERROR("bad cmd_type %i at %p\n",
   1168 				  header.header.cmd_type,
   1169 				  cmdbuf->buf - sizeof(header));
   1170 			ret = -EINVAL;
   1171 			goto cleanup;
   1172 		}
   1173 	}
   1174 
   1175 	DRM_DEBUG("END\n");
   1176 
   1177       cleanup:
   1178 	r300_pacify(dev_priv);
   1179 
   1180 	/* We emit the vertex buffer age here, outside the pacifier "brackets"
   1181 	 * for two reasons:
   1182 	 *  (1) This may coalesce multiple age emissions into a single one and
   1183 	 *  (2) more importantly, some chips lock up hard when scratch registers
   1184 	 *      are written inside the pacifier bracket.
   1185 	 */
   1186 	if (emit_dispatch_age) {
   1187 		RING_LOCALS;
   1188 
   1189 		/* Emit the vertex buffer age */
   1190 		BEGIN_RING(2);
   1191 		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
   1192 		ADVANCE_RING();
   1193 	}
   1194 
   1195 	COMMIT_RING();
   1196 
   1197 	return ret;
   1198 }
   1199