Home | History | Annotate | Line # | Download | only in savage
savage_state.c revision 1.1
      1 /* savage_state.c -- State and drawing support for Savage
      2  *
      3  * Copyright 2004  Felix Kuehling
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sub license,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial portions
     15  * of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     20  * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
     21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
     22  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 #include <drm/drmP.h>
     26 #include <drm/savage_drm.h>
     27 #include "savage_drv.h"
     28 
     29 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
     30 			       const struct drm_clip_rect * pbox)
     31 {
     32 	uint32_t scstart = dev_priv->state.s3d.new_scstart;
     33 	uint32_t scend = dev_priv->state.s3d.new_scend;
     34 	scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
     35 	    ((uint32_t) pbox->x1 & 0x000007ff) |
     36 	    (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
     37 	scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
     38 	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
     39 	    ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
     40 	if (scstart != dev_priv->state.s3d.scstart ||
     41 	    scend != dev_priv->state.s3d.scend) {
     42 		DMA_LOCALS;
     43 		BEGIN_DMA(4);
     44 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
     45 		DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
     46 		DMA_WRITE(scstart);
     47 		DMA_WRITE(scend);
     48 		dev_priv->state.s3d.scstart = scstart;
     49 		dev_priv->state.s3d.scend = scend;
     50 		dev_priv->waiting = 1;
     51 		DMA_COMMIT();
     52 	}
     53 }
     54 
     55 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
     56 			      const struct drm_clip_rect * pbox)
     57 {
     58 	uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
     59 	uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
     60 	drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
     61 	    ((uint32_t) pbox->x1 & 0x000007ff) |
     62 	    (((uint32_t) pbox->y1 << 12) & 0x00fff000);
     63 	drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
     64 	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
     65 	    ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
     66 	if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
     67 	    drawctrl1 != dev_priv->state.s4.drawctrl1) {
     68 		DMA_LOCALS;
     69 		BEGIN_DMA(4);
     70 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
     71 		DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
     72 		DMA_WRITE(drawctrl0);
     73 		DMA_WRITE(drawctrl1);
     74 		dev_priv->state.s4.drawctrl0 = drawctrl0;
     75 		dev_priv->state.s4.drawctrl1 = drawctrl1;
     76 		dev_priv->waiting = 1;
     77 		DMA_COMMIT();
     78 	}
     79 }
     80 
     81 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
     82 				 uint32_t addr)
     83 {
     84 	if ((addr & 6) != 2) {	/* reserved bits */
     85 		DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
     86 		return -EINVAL;
     87 	}
     88 	if (!(addr & 1)) {	/* local */
     89 		addr &= ~7;
     90 		if (addr < dev_priv->texture_offset ||
     91 		    addr >= dev_priv->texture_offset + dev_priv->texture_size) {
     92 			DRM_ERROR
     93 			    ("bad texAddr%d %08x (local addr out of range)\n",
     94 			     unit, addr);
     95 			return -EINVAL;
     96 		}
     97 	} else {		/* AGP */
     98 		if (!dev_priv->agp_textures) {
     99 			DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
    100 				  unit, addr);
    101 			return -EINVAL;
    102 		}
    103 		addr &= ~7;
    104 		if (addr < dev_priv->agp_textures->offset ||
    105 		    addr >= (dev_priv->agp_textures->offset +
    106 			     dev_priv->agp_textures->size)) {
    107 			DRM_ERROR
    108 			    ("bad texAddr%d %08x (AGP addr out of range)\n",
    109 			     unit, addr);
    110 			return -EINVAL;
    111 		}
    112 	}
    113 	return 0;
    114 }
    115 
    116 #define SAVE_STATE(reg,where)			\
    117 	if(start <= reg && start+count > reg)	\
    118 		dev_priv->state.where = regs[reg - start]
    119 #define SAVE_STATE_MASK(reg,where,mask) do {			\
    120 	if(start <= reg && start+count > reg) {			\
    121 		uint32_t tmp;					\
    122 		tmp = regs[reg - start];			\
    123 		dev_priv->state.where = (tmp & (mask)) |	\
    124 			(dev_priv->state.where & ~(mask));	\
    125 	}							\
    126 } while (0)
    127 
    128 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
    129 				   unsigned int start, unsigned int count,
    130 				   const uint32_t *regs)
    131 {
    132 	if (start < SAVAGE_TEXPALADDR_S3D ||
    133 	    start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
    134 		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
    135 			  start, start + count - 1);
    136 		return -EINVAL;
    137 	}
    138 
    139 	SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
    140 			~SAVAGE_SCISSOR_MASK_S3D);
    141 	SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
    142 			~SAVAGE_SCISSOR_MASK_S3D);
    143 
    144 	/* if any texture regs were changed ... */
    145 	if (start <= SAVAGE_TEXCTRL_S3D &&
    146 	    start + count > SAVAGE_TEXPALADDR_S3D) {
    147 		/* ... check texture state */
    148 		SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
    149 		SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
    150 		if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
    151 			return savage_verify_texaddr(dev_priv, 0,
    152 						dev_priv->state.s3d.texaddr);
    153 	}
    154 
    155 	return 0;
    156 }
    157 
    158 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
    159 				  unsigned int start, unsigned int count,
    160 				  const uint32_t *regs)
    161 {
    162 	int ret = 0;
    163 
    164 	if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
    165 	    start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
    166 		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
    167 			  start, start + count - 1);
    168 		return -EINVAL;
    169 	}
    170 
    171 	SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
    172 			~SAVAGE_SCISSOR_MASK_S4);
    173 	SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
    174 			~SAVAGE_SCISSOR_MASK_S4);
    175 
    176 	/* if any texture regs were changed ... */
    177 	if (start <= SAVAGE_TEXDESCR_S4 &&
    178 	    start + count > SAVAGE_TEXPALADDR_S4) {
    179 		/* ... check texture state */
    180 		SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
    181 		SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
    182 		SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
    183 		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
    184 			ret |= savage_verify_texaddr(dev_priv, 0,
    185 						dev_priv->state.s4.texaddr0);
    186 		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
    187 			ret |= savage_verify_texaddr(dev_priv, 1,
    188 						dev_priv->state.s4.texaddr1);
    189 	}
    190 
    191 	return ret;
    192 }
    193 
    194 #undef SAVE_STATE
    195 #undef SAVE_STATE_MASK
    196 
    197 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
    198 				 const drm_savage_cmd_header_t * cmd_header,
    199 				 const uint32_t *regs)
    200 {
    201 	unsigned int count = cmd_header->state.count;
    202 	unsigned int start = cmd_header->state.start;
    203 	unsigned int count2 = 0;
    204 	unsigned int bci_size;
    205 	int ret;
    206 	DMA_LOCALS;
    207 
    208 	if (!count)
    209 		return 0;
    210 
    211 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    212 		ret = savage_verify_state_s3d(dev_priv, start, count, regs);
    213 		if (ret != 0)
    214 			return ret;
    215 		/* scissor regs are emitted in savage_dispatch_draw */
    216 		if (start < SAVAGE_SCSTART_S3D) {
    217 			if (start + count > SAVAGE_SCEND_S3D + 1)
    218 				count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
    219 			if (start + count > SAVAGE_SCSTART_S3D)
    220 				count = SAVAGE_SCSTART_S3D - start;
    221 		} else if (start <= SAVAGE_SCEND_S3D) {
    222 			if (start + count > SAVAGE_SCEND_S3D + 1) {
    223 				count -= SAVAGE_SCEND_S3D + 1 - start;
    224 				start = SAVAGE_SCEND_S3D + 1;
    225 			} else
    226 				return 0;
    227 		}
    228 	} else {
    229 		ret = savage_verify_state_s4(dev_priv, start, count, regs);
    230 		if (ret != 0)
    231 			return ret;
    232 		/* scissor regs are emitted in savage_dispatch_draw */
    233 		if (start < SAVAGE_DRAWCTRL0_S4) {
    234 			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
    235 				count2 = count -
    236 					 (SAVAGE_DRAWCTRL1_S4 + 1 - start);
    237 			if (start + count > SAVAGE_DRAWCTRL0_S4)
    238 				count = SAVAGE_DRAWCTRL0_S4 - start;
    239 		} else if (start <= SAVAGE_DRAWCTRL1_S4) {
    240 			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
    241 				count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
    242 				start = SAVAGE_DRAWCTRL1_S4 + 1;
    243 			} else
    244 				return 0;
    245 		}
    246 	}
    247 
    248 	bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
    249 
    250 	if (cmd_header->state.global) {
    251 		BEGIN_DMA(bci_size + 1);
    252 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
    253 		dev_priv->waiting = 1;
    254 	} else {
    255 		BEGIN_DMA(bci_size);
    256 	}
    257 
    258 	do {
    259 		while (count > 0) {
    260 			unsigned int n = count < 255 ? count : 255;
    261 			DMA_SET_REGISTERS(start, n);
    262 			DMA_COPY(regs, n);
    263 			count -= n;
    264 			start += n;
    265 			regs += n;
    266 		}
    267 		start += 2;
    268 		regs += 2;
    269 		count = count2;
    270 		count2 = 0;
    271 	} while (count);
    272 
    273 	DMA_COMMIT();
    274 
    275 	return 0;
    276 }
    277 
    278 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
    279 				    const drm_savage_cmd_header_t * cmd_header,
    280 				    const struct drm_buf * dmabuf)
    281 {
    282 	unsigned char reorder = 0;
    283 	unsigned int prim = cmd_header->prim.prim;
    284 	unsigned int skip = cmd_header->prim.skip;
    285 	unsigned int n = cmd_header->prim.count;
    286 	unsigned int start = cmd_header->prim.start;
    287 	unsigned int i;
    288 	BCI_LOCALS;
    289 
    290 	if (!dmabuf) {
    291 		DRM_ERROR("called without dma buffers!\n");
    292 		return -EINVAL;
    293 	}
    294 
    295 	if (!n)
    296 		return 0;
    297 
    298 	switch (prim) {
    299 	case SAVAGE_PRIM_TRILIST_201:
    300 		reorder = 1;
    301 		prim = SAVAGE_PRIM_TRILIST;
    302 	case SAVAGE_PRIM_TRILIST:
    303 		if (n % 3 != 0) {
    304 			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
    305 				  n);
    306 			return -EINVAL;
    307 		}
    308 		break;
    309 	case SAVAGE_PRIM_TRISTRIP:
    310 	case SAVAGE_PRIM_TRIFAN:
    311 		if (n < 3) {
    312 			DRM_ERROR
    313 			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
    314 			     n);
    315 			return -EINVAL;
    316 		}
    317 		break;
    318 	default:
    319 		DRM_ERROR("invalid primitive type %u\n", prim);
    320 		return -EINVAL;
    321 	}
    322 
    323 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    324 		if (skip != 0) {
    325 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    326 			return -EINVAL;
    327 		}
    328 	} else {
    329 		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
    330 		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
    331 		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
    332 		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
    333 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    334 			return -EINVAL;
    335 		}
    336 		if (reorder) {
    337 			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
    338 			return -EINVAL;
    339 		}
    340 	}
    341 
    342 	if (start + n > dmabuf->total / 32) {
    343 		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
    344 			  start, start + n - 1, dmabuf->total / 32);
    345 		return -EINVAL;
    346 	}
    347 
    348 	/* Vertex DMA doesn't work with command DMA at the same time,
    349 	 * so we use BCI_... to submit commands here. Flush buffered
    350 	 * faked DMA first. */
    351 	DMA_FLUSH();
    352 
    353 	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
    354 		BEGIN_BCI(2);
    355 		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
    356 		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
    357 		dev_priv->state.common.vbaddr = dmabuf->bus_address;
    358 	}
    359 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
    360 		/* Workaround for what looks like a hardware bug. If a
    361 		 * WAIT_3D_IDLE was emitted some time before the
    362 		 * indexed drawing command then the engine will lock
    363 		 * up. There are two known workarounds:
    364 		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
    365 		BEGIN_BCI(63);
    366 		for (i = 0; i < 63; ++i)
    367 			BCI_WRITE(BCI_CMD_WAIT);
    368 		dev_priv->waiting = 0;
    369 	}
    370 
    371 	prim <<= 25;
    372 	while (n != 0) {
    373 		/* Can emit up to 255 indices (85 triangles) at once. */
    374 		unsigned int count = n > 255 ? 255 : n;
    375 		if (reorder) {
    376 			/* Need to reorder indices for correct flat
    377 			 * shading while preserving the clock sense
    378 			 * for correct culling. Only on Savage3D. */
    379 			int reorder[3] = { -1, -1, -1 };
    380 			reorder[start % 3] = 2;
    381 
    382 			BEGIN_BCI((count + 1 + 1) / 2);
    383 			BCI_DRAW_INDICES_S3D(count, prim, start + 2);
    384 
    385 			for (i = start + 1; i + 1 < start + count; i += 2)
    386 				BCI_WRITE((i + reorder[i % 3]) |
    387 					  ((i + 1 +
    388 					    reorder[(i + 1) % 3]) << 16));
    389 			if (i < start + count)
    390 				BCI_WRITE(i + reorder[i % 3]);
    391 		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    392 			BEGIN_BCI((count + 1 + 1) / 2);
    393 			BCI_DRAW_INDICES_S3D(count, prim, start);
    394 
    395 			for (i = start + 1; i + 1 < start + count; i += 2)
    396 				BCI_WRITE(i | ((i + 1) << 16));
    397 			if (i < start + count)
    398 				BCI_WRITE(i);
    399 		} else {
    400 			BEGIN_BCI((count + 2 + 1) / 2);
    401 			BCI_DRAW_INDICES_S4(count, prim, skip);
    402 
    403 			for (i = start; i + 1 < start + count; i += 2)
    404 				BCI_WRITE(i | ((i + 1) << 16));
    405 			if (i < start + count)
    406 				BCI_WRITE(i);
    407 		}
    408 
    409 		start += count;
    410 		n -= count;
    411 
    412 		prim |= BCI_CMD_DRAW_CONT;
    413 	}
    414 
    415 	return 0;
    416 }
    417 
    418 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
    419 				   const drm_savage_cmd_header_t * cmd_header,
    420 				   const uint32_t *vtxbuf, unsigned int vb_size,
    421 				   unsigned int vb_stride)
    422 {
    423 	unsigned char reorder = 0;
    424 	unsigned int prim = cmd_header->prim.prim;
    425 	unsigned int skip = cmd_header->prim.skip;
    426 	unsigned int n = cmd_header->prim.count;
    427 	unsigned int start = cmd_header->prim.start;
    428 	unsigned int vtx_size;
    429 	unsigned int i;
    430 	DMA_LOCALS;
    431 
    432 	if (!n)
    433 		return 0;
    434 
    435 	switch (prim) {
    436 	case SAVAGE_PRIM_TRILIST_201:
    437 		reorder = 1;
    438 		prim = SAVAGE_PRIM_TRILIST;
    439 	case SAVAGE_PRIM_TRILIST:
    440 		if (n % 3 != 0) {
    441 			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
    442 				  n);
    443 			return -EINVAL;
    444 		}
    445 		break;
    446 	case SAVAGE_PRIM_TRISTRIP:
    447 	case SAVAGE_PRIM_TRIFAN:
    448 		if (n < 3) {
    449 			DRM_ERROR
    450 			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
    451 			     n);
    452 			return -EINVAL;
    453 		}
    454 		break;
    455 	default:
    456 		DRM_ERROR("invalid primitive type %u\n", prim);
    457 		return -EINVAL;
    458 	}
    459 
    460 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    461 		if (skip > SAVAGE_SKIP_ALL_S3D) {
    462 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    463 			return -EINVAL;
    464 		}
    465 		vtx_size = 8;	/* full vertex */
    466 	} else {
    467 		if (skip > SAVAGE_SKIP_ALL_S4) {
    468 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    469 			return -EINVAL;
    470 		}
    471 		vtx_size = 10;	/* full vertex */
    472 	}
    473 
    474 	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
    475 	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
    476 	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
    477 
    478 	if (vtx_size > vb_stride) {
    479 		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
    480 			  vtx_size, vb_stride);
    481 		return -EINVAL;
    482 	}
    483 
    484 	if (start + n > vb_size / (vb_stride * 4)) {
    485 		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
    486 			  start, start + n - 1, vb_size / (vb_stride * 4));
    487 		return -EINVAL;
    488 	}
    489 
    490 	prim <<= 25;
    491 	while (n != 0) {
    492 		/* Can emit up to 255 vertices (85 triangles) at once. */
    493 		unsigned int count = n > 255 ? 255 : n;
    494 		if (reorder) {
    495 			/* Need to reorder vertices for correct flat
    496 			 * shading while preserving the clock sense
    497 			 * for correct culling. Only on Savage3D. */
    498 			int reorder[3] = { -1, -1, -1 };
    499 			reorder[start % 3] = 2;
    500 
    501 			BEGIN_DMA(count * vtx_size + 1);
    502 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    503 
    504 			for (i = start; i < start + count; ++i) {
    505 				unsigned int j = i + reorder[i % 3];
    506 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    507 			}
    508 
    509 			DMA_COMMIT();
    510 		} else {
    511 			BEGIN_DMA(count * vtx_size + 1);
    512 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    513 
    514 			if (vb_stride == vtx_size) {
    515 				DMA_COPY(&vtxbuf[vb_stride * start],
    516 					 vtx_size * count);
    517 			} else {
    518 				for (i = start; i < start + count; ++i) {
    519 					DMA_COPY(&vtxbuf [vb_stride * i],
    520 						 vtx_size);
    521 				}
    522 			}
    523 
    524 			DMA_COMMIT();
    525 		}
    526 
    527 		start += count;
    528 		n -= count;
    529 
    530 		prim |= BCI_CMD_DRAW_CONT;
    531 	}
    532 
    533 	return 0;
    534 }
    535 
    536 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
    537 				   const drm_savage_cmd_header_t * cmd_header,
    538 				   const uint16_t *idx,
    539 				   const struct drm_buf * dmabuf)
    540 {
    541 	unsigned char reorder = 0;
    542 	unsigned int prim = cmd_header->idx.prim;
    543 	unsigned int skip = cmd_header->idx.skip;
    544 	unsigned int n = cmd_header->idx.count;
    545 	unsigned int i;
    546 	BCI_LOCALS;
    547 
    548 	if (!dmabuf) {
    549 		DRM_ERROR("called without dma buffers!\n");
    550 		return -EINVAL;
    551 	}
    552 
    553 	if (!n)
    554 		return 0;
    555 
    556 	switch (prim) {
    557 	case SAVAGE_PRIM_TRILIST_201:
    558 		reorder = 1;
    559 		prim = SAVAGE_PRIM_TRILIST;
    560 	case SAVAGE_PRIM_TRILIST:
    561 		if (n % 3 != 0) {
    562 			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
    563 			return -EINVAL;
    564 		}
    565 		break;
    566 	case SAVAGE_PRIM_TRISTRIP:
    567 	case SAVAGE_PRIM_TRIFAN:
    568 		if (n < 3) {
    569 			DRM_ERROR
    570 			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
    571 			return -EINVAL;
    572 		}
    573 		break;
    574 	default:
    575 		DRM_ERROR("invalid primitive type %u\n", prim);
    576 		return -EINVAL;
    577 	}
    578 
    579 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    580 		if (skip != 0) {
    581 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    582 			return -EINVAL;
    583 		}
    584 	} else {
    585 		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
    586 		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
    587 		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
    588 		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
    589 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    590 			return -EINVAL;
    591 		}
    592 		if (reorder) {
    593 			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
    594 			return -EINVAL;
    595 		}
    596 	}
    597 
    598 	/* Vertex DMA doesn't work with command DMA at the same time,
    599 	 * so we use BCI_... to submit commands here. Flush buffered
    600 	 * faked DMA first. */
    601 	DMA_FLUSH();
    602 
    603 	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
    604 		BEGIN_BCI(2);
    605 		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
    606 		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
    607 		dev_priv->state.common.vbaddr = dmabuf->bus_address;
    608 	}
    609 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
    610 		/* Workaround for what looks like a hardware bug. If a
    611 		 * WAIT_3D_IDLE was emitted some time before the
    612 		 * indexed drawing command then the engine will lock
    613 		 * up. There are two known workarounds:
    614 		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
    615 		BEGIN_BCI(63);
    616 		for (i = 0; i < 63; ++i)
    617 			BCI_WRITE(BCI_CMD_WAIT);
    618 		dev_priv->waiting = 0;
    619 	}
    620 
    621 	prim <<= 25;
    622 	while (n != 0) {
    623 		/* Can emit up to 255 indices (85 triangles) at once. */
    624 		unsigned int count = n > 255 ? 255 : n;
    625 
    626 		/* check indices */
    627 		for (i = 0; i < count; ++i) {
    628 			if (idx[i] > dmabuf->total / 32) {
    629 				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
    630 					  i, idx[i], dmabuf->total / 32);
    631 				return -EINVAL;
    632 			}
    633 		}
    634 
    635 		if (reorder) {
    636 			/* Need to reorder indices for correct flat
    637 			 * shading while preserving the clock sense
    638 			 * for correct culling. Only on Savage3D. */
    639 			int reorder[3] = { 2, -1, -1 };
    640 
    641 			BEGIN_BCI((count + 1 + 1) / 2);
    642 			BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
    643 
    644 			for (i = 1; i + 1 < count; i += 2)
    645 				BCI_WRITE(idx[i + reorder[i % 3]] |
    646 					  (idx[i + 1 +
    647 					   reorder[(i + 1) % 3]] << 16));
    648 			if (i < count)
    649 				BCI_WRITE(idx[i + reorder[i % 3]]);
    650 		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    651 			BEGIN_BCI((count + 1 + 1) / 2);
    652 			BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
    653 
    654 			for (i = 1; i + 1 < count; i += 2)
    655 				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
    656 			if (i < count)
    657 				BCI_WRITE(idx[i]);
    658 		} else {
    659 			BEGIN_BCI((count + 2 + 1) / 2);
    660 			BCI_DRAW_INDICES_S4(count, prim, skip);
    661 
    662 			for (i = 0; i + 1 < count; i += 2)
    663 				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
    664 			if (i < count)
    665 				BCI_WRITE(idx[i]);
    666 		}
    667 
    668 		idx += count;
    669 		n -= count;
    670 
    671 		prim |= BCI_CMD_DRAW_CONT;
    672 	}
    673 
    674 	return 0;
    675 }
    676 
    677 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
    678 				  const drm_savage_cmd_header_t * cmd_header,
    679 				  const uint16_t *idx,
    680 				  const uint32_t *vtxbuf,
    681 				  unsigned int vb_size, unsigned int vb_stride)
    682 {
    683 	unsigned char reorder = 0;
    684 	unsigned int prim = cmd_header->idx.prim;
    685 	unsigned int skip = cmd_header->idx.skip;
    686 	unsigned int n = cmd_header->idx.count;
    687 	unsigned int vtx_size;
    688 	unsigned int i;
    689 	DMA_LOCALS;
    690 
    691 	if (!n)
    692 		return 0;
    693 
    694 	switch (prim) {
    695 	case SAVAGE_PRIM_TRILIST_201:
    696 		reorder = 1;
    697 		prim = SAVAGE_PRIM_TRILIST;
    698 	case SAVAGE_PRIM_TRILIST:
    699 		if (n % 3 != 0) {
    700 			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
    701 			return -EINVAL;
    702 		}
    703 		break;
    704 	case SAVAGE_PRIM_TRISTRIP:
    705 	case SAVAGE_PRIM_TRIFAN:
    706 		if (n < 3) {
    707 			DRM_ERROR
    708 			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
    709 			return -EINVAL;
    710 		}
    711 		break;
    712 	default:
    713 		DRM_ERROR("invalid primitive type %u\n", prim);
    714 		return -EINVAL;
    715 	}
    716 
    717 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    718 		if (skip > SAVAGE_SKIP_ALL_S3D) {
    719 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    720 			return -EINVAL;
    721 		}
    722 		vtx_size = 8;	/* full vertex */
    723 	} else {
    724 		if (skip > SAVAGE_SKIP_ALL_S4) {
    725 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    726 			return -EINVAL;
    727 		}
    728 		vtx_size = 10;	/* full vertex */
    729 	}
    730 
    731 	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
    732 	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
    733 	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
    734 
    735 	if (vtx_size > vb_stride) {
    736 		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
    737 			  vtx_size, vb_stride);
    738 		return -EINVAL;
    739 	}
    740 
    741 	prim <<= 25;
    742 	while (n != 0) {
    743 		/* Can emit up to 255 vertices (85 triangles) at once. */
    744 		unsigned int count = n > 255 ? 255 : n;
    745 
    746 		/* Check indices */
    747 		for (i = 0; i < count; ++i) {
    748 			if (idx[i] > vb_size / (vb_stride * 4)) {
    749 				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
    750 					  i, idx[i], vb_size / (vb_stride * 4));
    751 				return -EINVAL;
    752 			}
    753 		}
    754 
    755 		if (reorder) {
    756 			/* Need to reorder vertices for correct flat
    757 			 * shading while preserving the clock sense
    758 			 * for correct culling. Only on Savage3D. */
    759 			int reorder[3] = { 2, -1, -1 };
    760 
    761 			BEGIN_DMA(count * vtx_size + 1);
    762 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    763 
    764 			for (i = 0; i < count; ++i) {
    765 				unsigned int j = idx[i + reorder[i % 3]];
    766 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    767 			}
    768 
    769 			DMA_COMMIT();
    770 		} else {
    771 			BEGIN_DMA(count * vtx_size + 1);
    772 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    773 
    774 			for (i = 0; i < count; ++i) {
    775 				unsigned int j = idx[i];
    776 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    777 			}
    778 
    779 			DMA_COMMIT();
    780 		}
    781 
    782 		idx += count;
    783 		n -= count;
    784 
    785 		prim |= BCI_CMD_DRAW_CONT;
    786 	}
    787 
    788 	return 0;
    789 }
    790 
    791 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
    792 				 const drm_savage_cmd_header_t * cmd_header,
    793 				 const drm_savage_cmd_header_t *data,
    794 				 unsigned int nbox,
    795 				 const struct drm_clip_rect *boxes)
    796 {
    797 	unsigned int flags = cmd_header->clear0.flags;
    798 	unsigned int clear_cmd;
    799 	unsigned int i, nbufs;
    800 	DMA_LOCALS;
    801 
    802 	if (nbox == 0)
    803 		return 0;
    804 
    805 	clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
    806 	    BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
    807 	BCI_CMD_SET_ROP(clear_cmd, 0xCC);
    808 
    809 	nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
    810 	    ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
    811 	if (nbufs == 0)
    812 		return 0;
    813 
    814 	if (data->clear1.mask != 0xffffffff) {
    815 		/* set mask */
    816 		BEGIN_DMA(2);
    817 		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
    818 		DMA_WRITE(data->clear1.mask);
    819 		DMA_COMMIT();
    820 	}
    821 	for (i = 0; i < nbox; ++i) {
    822 		unsigned int x, y, w, h;
    823 		unsigned int buf;
    824 		x = boxes[i].x1, y = boxes[i].y1;
    825 		w = boxes[i].x2 - boxes[i].x1;
    826 		h = boxes[i].y2 - boxes[i].y1;
    827 		BEGIN_DMA(nbufs * 6);
    828 		for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
    829 			if (!(flags & buf))
    830 				continue;
    831 			DMA_WRITE(clear_cmd);
    832 			switch (buf) {
    833 			case SAVAGE_FRONT:
    834 				DMA_WRITE(dev_priv->front_offset);
    835 				DMA_WRITE(dev_priv->front_bd);
    836 				break;
    837 			case SAVAGE_BACK:
    838 				DMA_WRITE(dev_priv->back_offset);
    839 				DMA_WRITE(dev_priv->back_bd);
    840 				break;
    841 			case SAVAGE_DEPTH:
    842 				DMA_WRITE(dev_priv->depth_offset);
    843 				DMA_WRITE(dev_priv->depth_bd);
    844 				break;
    845 			}
    846 			DMA_WRITE(data->clear1.value);
    847 			DMA_WRITE(BCI_X_Y(x, y));
    848 			DMA_WRITE(BCI_W_H(w, h));
    849 		}
    850 		DMA_COMMIT();
    851 	}
    852 	if (data->clear1.mask != 0xffffffff) {
    853 		/* reset mask */
    854 		BEGIN_DMA(2);
    855 		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
    856 		DMA_WRITE(0xffffffff);
    857 		DMA_COMMIT();
    858 	}
    859 
    860 	return 0;
    861 }
    862 
    863 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
    864 				unsigned int nbox, const struct drm_clip_rect *boxes)
    865 {
    866 	unsigned int swap_cmd;
    867 	unsigned int i;
    868 	DMA_LOCALS;
    869 
    870 	if (nbox == 0)
    871 		return 0;
    872 
    873 	swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
    874 	    BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
    875 	BCI_CMD_SET_ROP(swap_cmd, 0xCC);
    876 
    877 	for (i = 0; i < nbox; ++i) {
    878 		BEGIN_DMA(6);
    879 		DMA_WRITE(swap_cmd);
    880 		DMA_WRITE(dev_priv->back_offset);
    881 		DMA_WRITE(dev_priv->back_bd);
    882 		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
    883 		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
    884 		DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
    885 				  boxes[i].y2 - boxes[i].y1));
    886 		DMA_COMMIT();
    887 	}
    888 
    889 	return 0;
    890 }
    891 
    892 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
    893 				const drm_savage_cmd_header_t *start,
    894 				const drm_savage_cmd_header_t *end,
    895 				const struct drm_buf * dmabuf,
    896 				const unsigned int *vtxbuf,
    897 				unsigned int vb_size, unsigned int vb_stride,
    898 				unsigned int nbox,
    899 				const struct drm_clip_rect *boxes)
    900 {
    901 	unsigned int i, j;
    902 	int ret;
    903 
    904 	for (i = 0; i < nbox; ++i) {
    905 		const drm_savage_cmd_header_t *cmdbuf;
    906 		dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
    907 
    908 		cmdbuf = start;
    909 		while (cmdbuf < end) {
    910 			drm_savage_cmd_header_t cmd_header;
    911 			cmd_header = *cmdbuf;
    912 			cmdbuf++;
    913 			switch (cmd_header.cmd.cmd) {
    914 			case SAVAGE_CMD_DMA_PRIM:
    915 				ret = savage_dispatch_dma_prim(
    916 					dev_priv, &cmd_header, dmabuf);
    917 				break;
    918 			case SAVAGE_CMD_VB_PRIM:
    919 				ret = savage_dispatch_vb_prim(
    920 					dev_priv, &cmd_header,
    921 					vtxbuf, vb_size, vb_stride);
    922 				break;
    923 			case SAVAGE_CMD_DMA_IDX:
    924 				j = (cmd_header.idx.count + 3) / 4;
    925 				/* j was check in savage_bci_cmdbuf */
    926 				ret = savage_dispatch_dma_idx(dev_priv,
    927 					&cmd_header, (const uint16_t *)cmdbuf,
    928 					dmabuf);
    929 				cmdbuf += j;
    930 				break;
    931 			case SAVAGE_CMD_VB_IDX:
    932 				j = (cmd_header.idx.count + 3) / 4;
    933 				/* j was check in savage_bci_cmdbuf */
    934 				ret = savage_dispatch_vb_idx(dev_priv,
    935 					&cmd_header, (const uint16_t *)cmdbuf,
    936 					(const uint32_t *)vtxbuf, vb_size,
    937 					vb_stride);
    938 				cmdbuf += j;
    939 				break;
    940 			default:
    941 				/* What's the best return code? EFAULT? */
    942 				DRM_ERROR("IMPLEMENTATION ERROR: "
    943 					  "non-drawing-command %d\n",
    944 					  cmd_header.cmd.cmd);
    945 				return -EINVAL;
    946 			}
    947 
    948 			if (ret != 0)
    949 				return ret;
    950 		}
    951 	}
    952 
    953 	return 0;
    954 }
    955 
    956 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
    957 {
    958 	drm_savage_private_t *dev_priv = dev->dev_private;
    959 	struct drm_device_dma *dma = dev->dma;
    960 	struct drm_buf *dmabuf;
    961 	drm_savage_cmdbuf_t *cmdbuf = data;
    962 	drm_savage_cmd_header_t *kcmd_addr = NULL;
    963 	drm_savage_cmd_header_t *first_draw_cmd;
    964 	unsigned int *kvb_addr = NULL;
    965 	struct drm_clip_rect *kbox_addr = NULL;
    966 	unsigned int i, j;
    967 	int ret = 0;
    968 
    969 	DRM_DEBUG("\n");
    970 
    971 	LOCK_TEST_WITH_RETURN(dev, file_priv);
    972 
    973 	if (dma && dma->buflist) {
    974 		if (cmdbuf->dma_idx > dma->buf_count) {
    975 			DRM_ERROR
    976 			    ("vertex buffer index %u out of range (0-%u)\n",
    977 			     cmdbuf->dma_idx, dma->buf_count - 1);
    978 			return -EINVAL;
    979 		}
    980 		dmabuf = dma->buflist[cmdbuf->dma_idx];
    981 	} else {
    982 		dmabuf = NULL;
    983 	}
    984 
    985 	/* Copy the user buffers into kernel temporary areas.  This hasn't been
    986 	 * a performance loss compared to VERIFYAREA_READ/
    987 	 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
    988 	 * for locking on FreeBSD.
    989 	 */
    990 	if (cmdbuf->size) {
    991 		kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL);
    992 		if (kcmd_addr == NULL)
    993 			return -ENOMEM;
    994 
    995 		if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr,
    996 				       cmdbuf->size * 8))
    997 		{
    998 			kfree(kcmd_addr);
    999 			return -EFAULT;
   1000 		}
   1001 		cmdbuf->cmd_addr = kcmd_addr;
   1002 	}
   1003 	if (cmdbuf->vb_size) {
   1004 		kvb_addr = kmalloc(cmdbuf->vb_size, GFP_KERNEL);
   1005 		if (kvb_addr == NULL) {
   1006 			ret = -ENOMEM;
   1007 			goto done;
   1008 		}
   1009 
   1010 		if (DRM_COPY_FROM_USER(kvb_addr, cmdbuf->vb_addr,
   1011 				       cmdbuf->vb_size)) {
   1012 			ret = -EFAULT;
   1013 			goto done;
   1014 		}
   1015 		cmdbuf->vb_addr = kvb_addr;
   1016 	}
   1017 	if (cmdbuf->nbox) {
   1018 		kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect),
   1019 					  GFP_KERNEL);
   1020 		if (kbox_addr == NULL) {
   1021 			ret = -ENOMEM;
   1022 			goto done;
   1023 		}
   1024 
   1025 		if (DRM_COPY_FROM_USER(kbox_addr, cmdbuf->box_addr,
   1026 				       cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
   1027 			ret = -EFAULT;
   1028 			goto done;
   1029 		}
   1030 	cmdbuf->box_addr = kbox_addr;
   1031 	}
   1032 
   1033 	/* Make sure writes to DMA buffers are finished before sending
   1034 	 * DMA commands to the graphics hardware. */
   1035 	DRM_MEMORYBARRIER();
   1036 
   1037 	/* Coming from user space. Don't know if the Xserver has
   1038 	 * emitted wait commands. Assuming the worst. */
   1039 	dev_priv->waiting = 1;
   1040 
   1041 	i = 0;
   1042 	first_draw_cmd = NULL;
   1043 	while (i < cmdbuf->size) {
   1044 		drm_savage_cmd_header_t cmd_header;
   1045 		cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
   1046 		cmdbuf->cmd_addr++;
   1047 		i++;
   1048 
   1049 		/* Group drawing commands with same state to minimize
   1050 		 * iterations over clip rects. */
   1051 		j = 0;
   1052 		switch (cmd_header.cmd.cmd) {
   1053 		case SAVAGE_CMD_DMA_IDX:
   1054 		case SAVAGE_CMD_VB_IDX:
   1055 			j = (cmd_header.idx.count + 3) / 4;
   1056 			if (i + j > cmdbuf->size) {
   1057 				DRM_ERROR("indexed drawing command extends "
   1058 					  "beyond end of command buffer\n");
   1059 				DMA_FLUSH();
   1060 				ret = -EINVAL;
   1061 				goto done;
   1062 			}
   1063 			/* fall through */
   1064 		case SAVAGE_CMD_DMA_PRIM:
   1065 		case SAVAGE_CMD_VB_PRIM:
   1066 			if (!first_draw_cmd)
   1067 				first_draw_cmd = cmdbuf->cmd_addr - 1;
   1068 			cmdbuf->cmd_addr += j;
   1069 			i += j;
   1070 			break;
   1071 		default:
   1072 			if (first_draw_cmd) {
   1073 				ret = savage_dispatch_draw(
   1074 				      dev_priv, first_draw_cmd,
   1075 				      cmdbuf->cmd_addr - 1,
   1076 				      dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
   1077 				      cmdbuf->vb_stride,
   1078 				      cmdbuf->nbox, cmdbuf->box_addr);
   1079 				if (ret != 0)
   1080 					goto done;
   1081 				first_draw_cmd = NULL;
   1082 			}
   1083 		}
   1084 		if (first_draw_cmd)
   1085 			continue;
   1086 
   1087 		switch (cmd_header.cmd.cmd) {
   1088 		case SAVAGE_CMD_STATE:
   1089 			j = (cmd_header.state.count + 1) / 2;
   1090 			if (i + j > cmdbuf->size) {
   1091 				DRM_ERROR("command SAVAGE_CMD_STATE extends "
   1092 					  "beyond end of command buffer\n");
   1093 				DMA_FLUSH();
   1094 				ret = -EINVAL;
   1095 				goto done;
   1096 			}
   1097 			ret = savage_dispatch_state(dev_priv, &cmd_header,
   1098 				(const uint32_t *)cmdbuf->cmd_addr);
   1099 			cmdbuf->cmd_addr += j;
   1100 			i += j;
   1101 			break;
   1102 		case SAVAGE_CMD_CLEAR:
   1103 			if (i + 1 > cmdbuf->size) {
   1104 				DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
   1105 					  "beyond end of command buffer\n");
   1106 				DMA_FLUSH();
   1107 				ret = -EINVAL;
   1108 				goto done;
   1109 			}
   1110 			ret = savage_dispatch_clear(dev_priv, &cmd_header,
   1111 						    cmdbuf->cmd_addr,
   1112 						    cmdbuf->nbox,
   1113 						    cmdbuf->box_addr);
   1114 			cmdbuf->cmd_addr++;
   1115 			i++;
   1116 			break;
   1117 		case SAVAGE_CMD_SWAP:
   1118 			ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
   1119 						   cmdbuf->box_addr);
   1120 			break;
   1121 		default:
   1122 			DRM_ERROR("invalid command 0x%x\n",
   1123 				  cmd_header.cmd.cmd);
   1124 			DMA_FLUSH();
   1125 			ret = -EINVAL;
   1126 			goto done;
   1127 		}
   1128 
   1129 		if (ret != 0) {
   1130 			DMA_FLUSH();
   1131 			goto done;
   1132 		}
   1133 	}
   1134 
   1135 	if (first_draw_cmd) {
   1136 		ret = savage_dispatch_draw (
   1137 			dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
   1138 			cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
   1139 			cmdbuf->nbox, cmdbuf->box_addr);
   1140 		if (ret != 0) {
   1141 			DMA_FLUSH();
   1142 			goto done;
   1143 		}
   1144 	}
   1145 
   1146 	DMA_FLUSH();
   1147 
   1148 	if (dmabuf && cmdbuf->discard) {
   1149 		drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
   1150 		uint16_t event;
   1151 		event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
   1152 		SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
   1153 		savage_freelist_put(dev, dmabuf);
   1154 	}
   1155 
   1156 done:
   1157 	/* If we didn't need to allocate them, these'll be NULL */
   1158 	kfree(kcmd_addr);
   1159 	kfree(kvb_addr);
   1160 	kfree(kbox_addr);
   1161 
   1162 	return ret;
   1163 }
   1164