Home | History | Annotate | Line # | Download | only in savage
savage_state.c revision 1.1.1.4
      1 /*	$NetBSD: savage_state.c,v 1.1.1.4 2021/12/18 20:15:53 riastradh Exp $	*/
      2 
      3 /* savage_state.c -- State and drawing support for Savage
      4  *
      5  * Copyright 2004  Felix Kuehling
      6  * All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sub license,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     22  * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
     24  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: savage_state.c,v 1.1.1.4 2021/12/18 20:15:53 riastradh Exp $");
     30 
     31 #include <linux/slab.h>
     32 #include <linux/uaccess.h>
     33 
     34 #include <drm/drm_device.h>
     35 #include <drm/drm_file.h>
     36 #include <drm/drm_print.h>
     37 #include <drm/savage_drm.h>
     38 
     39 #include "savage_drv.h"
     40 
     41 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
     42 			       const struct drm_clip_rect * pbox)
     43 {
     44 	uint32_t scstart = dev_priv->state.s3d.new_scstart;
     45 	uint32_t scend = dev_priv->state.s3d.new_scend;
     46 	scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
     47 	    ((uint32_t) pbox->x1 & 0x000007ff) |
     48 	    (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
     49 	scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
     50 	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
     51 	    ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
     52 	if (scstart != dev_priv->state.s3d.scstart ||
     53 	    scend != dev_priv->state.s3d.scend) {
     54 		DMA_LOCALS;
     55 		BEGIN_DMA(4);
     56 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
     57 		DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
     58 		DMA_WRITE(scstart);
     59 		DMA_WRITE(scend);
     60 		dev_priv->state.s3d.scstart = scstart;
     61 		dev_priv->state.s3d.scend = scend;
     62 		dev_priv->waiting = 1;
     63 		DMA_COMMIT();
     64 	}
     65 }
     66 
     67 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
     68 			      const struct drm_clip_rect * pbox)
     69 {
     70 	uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
     71 	uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
     72 	drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
     73 	    ((uint32_t) pbox->x1 & 0x000007ff) |
     74 	    (((uint32_t) pbox->y1 << 12) & 0x00fff000);
     75 	drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
     76 	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
     77 	    ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
     78 	if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
     79 	    drawctrl1 != dev_priv->state.s4.drawctrl1) {
     80 		DMA_LOCALS;
     81 		BEGIN_DMA(4);
     82 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
     83 		DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
     84 		DMA_WRITE(drawctrl0);
     85 		DMA_WRITE(drawctrl1);
     86 		dev_priv->state.s4.drawctrl0 = drawctrl0;
     87 		dev_priv->state.s4.drawctrl1 = drawctrl1;
     88 		dev_priv->waiting = 1;
     89 		DMA_COMMIT();
     90 	}
     91 }
     92 
     93 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
     94 				 uint32_t addr)
     95 {
     96 	if ((addr & 6) != 2) {	/* reserved bits */
     97 		DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
     98 		return -EINVAL;
     99 	}
    100 	if (!(addr & 1)) {	/* local */
    101 		addr &= ~7;
    102 		if (addr < dev_priv->texture_offset ||
    103 		    addr >= dev_priv->texture_offset + dev_priv->texture_size) {
    104 			DRM_ERROR
    105 			    ("bad texAddr%d %08x (local addr out of range)\n",
    106 			     unit, addr);
    107 			return -EINVAL;
    108 		}
    109 	} else {		/* AGP */
    110 		if (!dev_priv->agp_textures) {
    111 			DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
    112 				  unit, addr);
    113 			return -EINVAL;
    114 		}
    115 		addr &= ~7;
    116 		if (addr < dev_priv->agp_textures->offset ||
    117 		    addr >= (dev_priv->agp_textures->offset +
    118 			     dev_priv->agp_textures->size)) {
    119 			DRM_ERROR
    120 			    ("bad texAddr%d %08x (AGP addr out of range)\n",
    121 			     unit, addr);
    122 			return -EINVAL;
    123 		}
    124 	}
    125 	return 0;
    126 }
    127 
    128 #define SAVE_STATE(reg,where)			\
    129 	if(start <= reg && start+count > reg)	\
    130 		dev_priv->state.where = regs[reg - start]
    131 #define SAVE_STATE_MASK(reg,where,mask) do {			\
    132 	if(start <= reg && start+count > reg) {			\
    133 		uint32_t tmp;					\
    134 		tmp = regs[reg - start];			\
    135 		dev_priv->state.where = (tmp & (mask)) |	\
    136 			(dev_priv->state.where & ~(mask));	\
    137 	}							\
    138 } while (0)
    139 
    140 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
    141 				   unsigned int start, unsigned int count,
    142 				   const uint32_t *regs)
    143 {
    144 	if (start < SAVAGE_TEXPALADDR_S3D ||
    145 	    start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
    146 		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
    147 			  start, start + count - 1);
    148 		return -EINVAL;
    149 	}
    150 
    151 	SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
    152 			~SAVAGE_SCISSOR_MASK_S3D);
    153 	SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
    154 			~SAVAGE_SCISSOR_MASK_S3D);
    155 
    156 	/* if any texture regs were changed ... */
    157 	if (start <= SAVAGE_TEXCTRL_S3D &&
    158 	    start + count > SAVAGE_TEXPALADDR_S3D) {
    159 		/* ... check texture state */
    160 		SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
    161 		SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
    162 		if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
    163 			return savage_verify_texaddr(dev_priv, 0,
    164 						dev_priv->state.s3d.texaddr);
    165 	}
    166 
    167 	return 0;
    168 }
    169 
    170 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
    171 				  unsigned int start, unsigned int count,
    172 				  const uint32_t *regs)
    173 {
    174 	int ret = 0;
    175 
    176 	if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
    177 	    start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
    178 		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
    179 			  start, start + count - 1);
    180 		return -EINVAL;
    181 	}
    182 
    183 	SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
    184 			~SAVAGE_SCISSOR_MASK_S4);
    185 	SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
    186 			~SAVAGE_SCISSOR_MASK_S4);
    187 
    188 	/* if any texture regs were changed ... */
    189 	if (start <= SAVAGE_TEXDESCR_S4 &&
    190 	    start + count > SAVAGE_TEXPALADDR_S4) {
    191 		/* ... check texture state */
    192 		SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
    193 		SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
    194 		SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
    195 		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
    196 			ret |= savage_verify_texaddr(dev_priv, 0,
    197 						dev_priv->state.s4.texaddr0);
    198 		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
    199 			ret |= savage_verify_texaddr(dev_priv, 1,
    200 						dev_priv->state.s4.texaddr1);
    201 	}
    202 
    203 	return ret;
    204 }
    205 
    206 #undef SAVE_STATE
    207 #undef SAVE_STATE_MASK
    208 
    209 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
    210 				 const drm_savage_cmd_header_t * cmd_header,
    211 				 const uint32_t *regs)
    212 {
    213 	unsigned int count = cmd_header->state.count;
    214 	unsigned int start = cmd_header->state.start;
    215 	unsigned int count2 = 0;
    216 	unsigned int bci_size;
    217 	int ret;
    218 	DMA_LOCALS;
    219 
    220 	if (!count)
    221 		return 0;
    222 
    223 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    224 		ret = savage_verify_state_s3d(dev_priv, start, count, regs);
    225 		if (ret != 0)
    226 			return ret;
    227 		/* scissor regs are emitted in savage_dispatch_draw */
    228 		if (start < SAVAGE_SCSTART_S3D) {
    229 			if (start + count > SAVAGE_SCEND_S3D + 1)
    230 				count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
    231 			if (start + count > SAVAGE_SCSTART_S3D)
    232 				count = SAVAGE_SCSTART_S3D - start;
    233 		} else if (start <= SAVAGE_SCEND_S3D) {
    234 			if (start + count > SAVAGE_SCEND_S3D + 1) {
    235 				count -= SAVAGE_SCEND_S3D + 1 - start;
    236 				start = SAVAGE_SCEND_S3D + 1;
    237 			} else
    238 				return 0;
    239 		}
    240 	} else {
    241 		ret = savage_verify_state_s4(dev_priv, start, count, regs);
    242 		if (ret != 0)
    243 			return ret;
    244 		/* scissor regs are emitted in savage_dispatch_draw */
    245 		if (start < SAVAGE_DRAWCTRL0_S4) {
    246 			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
    247 				count2 = count -
    248 					 (SAVAGE_DRAWCTRL1_S4 + 1 - start);
    249 			if (start + count > SAVAGE_DRAWCTRL0_S4)
    250 				count = SAVAGE_DRAWCTRL0_S4 - start;
    251 		} else if (start <= SAVAGE_DRAWCTRL1_S4) {
    252 			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
    253 				count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
    254 				start = SAVAGE_DRAWCTRL1_S4 + 1;
    255 			} else
    256 				return 0;
    257 		}
    258 	}
    259 
    260 	bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
    261 
    262 	if (cmd_header->state.global) {
    263 		BEGIN_DMA(bci_size + 1);
    264 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
    265 		dev_priv->waiting = 1;
    266 	} else {
    267 		BEGIN_DMA(bci_size);
    268 	}
    269 
    270 	do {
    271 		while (count > 0) {
    272 			unsigned int n = count < 255 ? count : 255;
    273 			DMA_SET_REGISTERS(start, n);
    274 			DMA_COPY(regs, n);
    275 			count -= n;
    276 			start += n;
    277 			regs += n;
    278 		}
    279 		start += 2;
    280 		regs += 2;
    281 		count = count2;
    282 		count2 = 0;
    283 	} while (count);
    284 
    285 	DMA_COMMIT();
    286 
    287 	return 0;
    288 }
    289 
    290 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
    291 				    const drm_savage_cmd_header_t * cmd_header,
    292 				    const struct drm_buf * dmabuf)
    293 {
    294 	unsigned char reorder = 0;
    295 	unsigned int prim = cmd_header->prim.prim;
    296 	unsigned int skip = cmd_header->prim.skip;
    297 	unsigned int n = cmd_header->prim.count;
    298 	unsigned int start = cmd_header->prim.start;
    299 	unsigned int i;
    300 	BCI_LOCALS;
    301 
    302 	if (!dmabuf) {
    303 		DRM_ERROR("called without dma buffers!\n");
    304 		return -EINVAL;
    305 	}
    306 
    307 	if (!n)
    308 		return 0;
    309 
    310 	switch (prim) {
    311 	case SAVAGE_PRIM_TRILIST_201:
    312 		reorder = 1;
    313 		prim = SAVAGE_PRIM_TRILIST;
    314 		/* fall through */
    315 	case SAVAGE_PRIM_TRILIST:
    316 		if (n % 3 != 0) {
    317 			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
    318 				  n);
    319 			return -EINVAL;
    320 		}
    321 		break;
    322 	case SAVAGE_PRIM_TRISTRIP:
    323 	case SAVAGE_PRIM_TRIFAN:
    324 		if (n < 3) {
    325 			DRM_ERROR
    326 			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
    327 			     n);
    328 			return -EINVAL;
    329 		}
    330 		break;
    331 	default:
    332 		DRM_ERROR("invalid primitive type %u\n", prim);
    333 		return -EINVAL;
    334 	}
    335 
    336 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    337 		if (skip != 0) {
    338 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    339 			return -EINVAL;
    340 		}
    341 	} else {
    342 		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
    343 		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
    344 		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
    345 		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
    346 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    347 			return -EINVAL;
    348 		}
    349 		if (reorder) {
    350 			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
    351 			return -EINVAL;
    352 		}
    353 	}
    354 
    355 	if (start + n > dmabuf->total / 32) {
    356 		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
    357 			  start, start + n - 1, dmabuf->total / 32);
    358 		return -EINVAL;
    359 	}
    360 
    361 	/* Vertex DMA doesn't work with command DMA at the same time,
    362 	 * so we use BCI_... to submit commands here. Flush buffered
    363 	 * faked DMA first. */
    364 	DMA_FLUSH();
    365 
    366 	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
    367 		BEGIN_BCI(2);
    368 		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
    369 		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
    370 		dev_priv->state.common.vbaddr = dmabuf->bus_address;
    371 	}
    372 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
    373 		/* Workaround for what looks like a hardware bug. If a
    374 		 * WAIT_3D_IDLE was emitted some time before the
    375 		 * indexed drawing command then the engine will lock
    376 		 * up. There are two known workarounds:
    377 		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
    378 		BEGIN_BCI(63);
    379 		for (i = 0; i < 63; ++i)
    380 			BCI_WRITE(BCI_CMD_WAIT);
    381 		dev_priv->waiting = 0;
    382 	}
    383 
    384 	prim <<= 25;
    385 	while (n != 0) {
    386 		/* Can emit up to 255 indices (85 triangles) at once. */
    387 		unsigned int count = n > 255 ? 255 : n;
    388 		if (reorder) {
    389 			/* Need to reorder indices for correct flat
    390 			 * shading while preserving the clock sense
    391 			 * for correct culling. Only on Savage3D. */
    392 			int reorder[3] = { -1, -1, -1 };
    393 			reorder[start % 3] = 2;
    394 
    395 			BEGIN_BCI((count + 1 + 1) / 2);
    396 			BCI_DRAW_INDICES_S3D(count, prim, start + 2);
    397 
    398 			for (i = start + 1; i + 1 < start + count; i += 2)
    399 				BCI_WRITE((i + reorder[i % 3]) |
    400 					  ((i + 1 +
    401 					    reorder[(i + 1) % 3]) << 16));
    402 			if (i < start + count)
    403 				BCI_WRITE(i + reorder[i % 3]);
    404 		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    405 			BEGIN_BCI((count + 1 + 1) / 2);
    406 			BCI_DRAW_INDICES_S3D(count, prim, start);
    407 
    408 			for (i = start + 1; i + 1 < start + count; i += 2)
    409 				BCI_WRITE(i | ((i + 1) << 16));
    410 			if (i < start + count)
    411 				BCI_WRITE(i);
    412 		} else {
    413 			BEGIN_BCI((count + 2 + 1) / 2);
    414 			BCI_DRAW_INDICES_S4(count, prim, skip);
    415 
    416 			for (i = start; i + 1 < start + count; i += 2)
    417 				BCI_WRITE(i | ((i + 1) << 16));
    418 			if (i < start + count)
    419 				BCI_WRITE(i);
    420 		}
    421 
    422 		start += count;
    423 		n -= count;
    424 
    425 		prim |= BCI_CMD_DRAW_CONT;
    426 	}
    427 
    428 	return 0;
    429 }
    430 
    431 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
    432 				   const drm_savage_cmd_header_t * cmd_header,
    433 				   const uint32_t *vtxbuf, unsigned int vb_size,
    434 				   unsigned int vb_stride)
    435 {
    436 	unsigned char reorder = 0;
    437 	unsigned int prim = cmd_header->prim.prim;
    438 	unsigned int skip = cmd_header->prim.skip;
    439 	unsigned int n = cmd_header->prim.count;
    440 	unsigned int start = cmd_header->prim.start;
    441 	unsigned int vtx_size;
    442 	unsigned int i;
    443 	DMA_LOCALS;
    444 
    445 	if (!n)
    446 		return 0;
    447 
    448 	switch (prim) {
    449 	case SAVAGE_PRIM_TRILIST_201:
    450 		reorder = 1;
    451 		prim = SAVAGE_PRIM_TRILIST;
    452 		/* fall through */
    453 	case SAVAGE_PRIM_TRILIST:
    454 		if (n % 3 != 0) {
    455 			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
    456 				  n);
    457 			return -EINVAL;
    458 		}
    459 		break;
    460 	case SAVAGE_PRIM_TRISTRIP:
    461 	case SAVAGE_PRIM_TRIFAN:
    462 		if (n < 3) {
    463 			DRM_ERROR
    464 			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
    465 			     n);
    466 			return -EINVAL;
    467 		}
    468 		break;
    469 	default:
    470 		DRM_ERROR("invalid primitive type %u\n", prim);
    471 		return -EINVAL;
    472 	}
    473 
    474 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    475 		if (skip > SAVAGE_SKIP_ALL_S3D) {
    476 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    477 			return -EINVAL;
    478 		}
    479 		vtx_size = 8;	/* full vertex */
    480 	} else {
    481 		if (skip > SAVAGE_SKIP_ALL_S4) {
    482 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    483 			return -EINVAL;
    484 		}
    485 		vtx_size = 10;	/* full vertex */
    486 	}
    487 
    488 	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
    489 	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
    490 	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
    491 
    492 	if (vtx_size > vb_stride) {
    493 		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
    494 			  vtx_size, vb_stride);
    495 		return -EINVAL;
    496 	}
    497 
    498 	if (start + n > vb_size / (vb_stride * 4)) {
    499 		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
    500 			  start, start + n - 1, vb_size / (vb_stride * 4));
    501 		return -EINVAL;
    502 	}
    503 
    504 	prim <<= 25;
    505 	while (n != 0) {
    506 		/* Can emit up to 255 vertices (85 triangles) at once. */
    507 		unsigned int count = n > 255 ? 255 : n;
    508 		if (reorder) {
    509 			/* Need to reorder vertices for correct flat
    510 			 * shading while preserving the clock sense
    511 			 * for correct culling. Only on Savage3D. */
    512 			int reorder[3] = { -1, -1, -1 };
    513 			reorder[start % 3] = 2;
    514 
    515 			BEGIN_DMA(count * vtx_size + 1);
    516 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    517 
    518 			for (i = start; i < start + count; ++i) {
    519 				unsigned int j = i + reorder[i % 3];
    520 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    521 			}
    522 
    523 			DMA_COMMIT();
    524 		} else {
    525 			BEGIN_DMA(count * vtx_size + 1);
    526 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    527 
    528 			if (vb_stride == vtx_size) {
    529 				DMA_COPY(&vtxbuf[vb_stride * start],
    530 					 vtx_size * count);
    531 			} else {
    532 				for (i = start; i < start + count; ++i) {
    533 					DMA_COPY(&vtxbuf [vb_stride * i],
    534 						 vtx_size);
    535 				}
    536 			}
    537 
    538 			DMA_COMMIT();
    539 		}
    540 
    541 		start += count;
    542 		n -= count;
    543 
    544 		prim |= BCI_CMD_DRAW_CONT;
    545 	}
    546 
    547 	return 0;
    548 }
    549 
    550 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
    551 				   const drm_savage_cmd_header_t * cmd_header,
    552 				   const uint16_t *idx,
    553 				   const struct drm_buf * dmabuf)
    554 {
    555 	unsigned char reorder = 0;
    556 	unsigned int prim = cmd_header->idx.prim;
    557 	unsigned int skip = cmd_header->idx.skip;
    558 	unsigned int n = cmd_header->idx.count;
    559 	unsigned int i;
    560 	BCI_LOCALS;
    561 
    562 	if (!dmabuf) {
    563 		DRM_ERROR("called without dma buffers!\n");
    564 		return -EINVAL;
    565 	}
    566 
    567 	if (!n)
    568 		return 0;
    569 
    570 	switch (prim) {
    571 	case SAVAGE_PRIM_TRILIST_201:
    572 		reorder = 1;
    573 		prim = SAVAGE_PRIM_TRILIST;
    574 		/* fall through */
    575 	case SAVAGE_PRIM_TRILIST:
    576 		if (n % 3 != 0) {
    577 			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
    578 			return -EINVAL;
    579 		}
    580 		break;
    581 	case SAVAGE_PRIM_TRISTRIP:
    582 	case SAVAGE_PRIM_TRIFAN:
    583 		if (n < 3) {
    584 			DRM_ERROR
    585 			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
    586 			return -EINVAL;
    587 		}
    588 		break;
    589 	default:
    590 		DRM_ERROR("invalid primitive type %u\n", prim);
    591 		return -EINVAL;
    592 	}
    593 
    594 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    595 		if (skip != 0) {
    596 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    597 			return -EINVAL;
    598 		}
    599 	} else {
    600 		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
    601 		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
    602 		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
    603 		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
    604 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    605 			return -EINVAL;
    606 		}
    607 		if (reorder) {
    608 			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
    609 			return -EINVAL;
    610 		}
    611 	}
    612 
    613 	/* Vertex DMA doesn't work with command DMA at the same time,
    614 	 * so we use BCI_... to submit commands here. Flush buffered
    615 	 * faked DMA first. */
    616 	DMA_FLUSH();
    617 
    618 	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
    619 		BEGIN_BCI(2);
    620 		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
    621 		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
    622 		dev_priv->state.common.vbaddr = dmabuf->bus_address;
    623 	}
    624 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
    625 		/* Workaround for what looks like a hardware bug. If a
    626 		 * WAIT_3D_IDLE was emitted some time before the
    627 		 * indexed drawing command then the engine will lock
    628 		 * up. There are two known workarounds:
    629 		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
    630 		BEGIN_BCI(63);
    631 		for (i = 0; i < 63; ++i)
    632 			BCI_WRITE(BCI_CMD_WAIT);
    633 		dev_priv->waiting = 0;
    634 	}
    635 
    636 	prim <<= 25;
    637 	while (n != 0) {
    638 		/* Can emit up to 255 indices (85 triangles) at once. */
    639 		unsigned int count = n > 255 ? 255 : n;
    640 
    641 		/* check indices */
    642 		for (i = 0; i < count; ++i) {
    643 			if (idx[i] > dmabuf->total / 32) {
    644 				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
    645 					  i, idx[i], dmabuf->total / 32);
    646 				return -EINVAL;
    647 			}
    648 		}
    649 
    650 		if (reorder) {
    651 			/* Need to reorder indices for correct flat
    652 			 * shading while preserving the clock sense
    653 			 * for correct culling. Only on Savage3D. */
    654 			int reorder[3] = { 2, -1, -1 };
    655 
    656 			BEGIN_BCI((count + 1 + 1) / 2);
    657 			BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
    658 
    659 			for (i = 1; i + 1 < count; i += 2)
    660 				BCI_WRITE(idx[i + reorder[i % 3]] |
    661 					  (idx[i + 1 +
    662 					   reorder[(i + 1) % 3]] << 16));
    663 			if (i < count)
    664 				BCI_WRITE(idx[i + reorder[i % 3]]);
    665 		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    666 			BEGIN_BCI((count + 1 + 1) / 2);
    667 			BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
    668 
    669 			for (i = 1; i + 1 < count; i += 2)
    670 				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
    671 			if (i < count)
    672 				BCI_WRITE(idx[i]);
    673 		} else {
    674 			BEGIN_BCI((count + 2 + 1) / 2);
    675 			BCI_DRAW_INDICES_S4(count, prim, skip);
    676 
    677 			for (i = 0; i + 1 < count; i += 2)
    678 				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
    679 			if (i < count)
    680 				BCI_WRITE(idx[i]);
    681 		}
    682 
    683 		idx += count;
    684 		n -= count;
    685 
    686 		prim |= BCI_CMD_DRAW_CONT;
    687 	}
    688 
    689 	return 0;
    690 }
    691 
    692 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
    693 				  const drm_savage_cmd_header_t * cmd_header,
    694 				  const uint16_t *idx,
    695 				  const uint32_t *vtxbuf,
    696 				  unsigned int vb_size, unsigned int vb_stride)
    697 {
    698 	unsigned char reorder = 0;
    699 	unsigned int prim = cmd_header->idx.prim;
    700 	unsigned int skip = cmd_header->idx.skip;
    701 	unsigned int n = cmd_header->idx.count;
    702 	unsigned int vtx_size;
    703 	unsigned int i;
    704 	DMA_LOCALS;
    705 
    706 	if (!n)
    707 		return 0;
    708 
    709 	switch (prim) {
    710 	case SAVAGE_PRIM_TRILIST_201:
    711 		reorder = 1;
    712 		prim = SAVAGE_PRIM_TRILIST;
    713 		/* fall through */
    714 	case SAVAGE_PRIM_TRILIST:
    715 		if (n % 3 != 0) {
    716 			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
    717 			return -EINVAL;
    718 		}
    719 		break;
    720 	case SAVAGE_PRIM_TRISTRIP:
    721 	case SAVAGE_PRIM_TRIFAN:
    722 		if (n < 3) {
    723 			DRM_ERROR
    724 			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
    725 			return -EINVAL;
    726 		}
    727 		break;
    728 	default:
    729 		DRM_ERROR("invalid primitive type %u\n", prim);
    730 		return -EINVAL;
    731 	}
    732 
    733 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    734 		if (skip > SAVAGE_SKIP_ALL_S3D) {
    735 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    736 			return -EINVAL;
    737 		}
    738 		vtx_size = 8;	/* full vertex */
    739 	} else {
    740 		if (skip > SAVAGE_SKIP_ALL_S4) {
    741 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    742 			return -EINVAL;
    743 		}
    744 		vtx_size = 10;	/* full vertex */
    745 	}
    746 
    747 	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
    748 	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
    749 	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
    750 
    751 	if (vtx_size > vb_stride) {
    752 		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
    753 			  vtx_size, vb_stride);
    754 		return -EINVAL;
    755 	}
    756 
    757 	prim <<= 25;
    758 	while (n != 0) {
    759 		/* Can emit up to 255 vertices (85 triangles) at once. */
    760 		unsigned int count = n > 255 ? 255 : n;
    761 
    762 		/* Check indices */
    763 		for (i = 0; i < count; ++i) {
    764 			if (idx[i] > vb_size / (vb_stride * 4)) {
    765 				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
    766 					  i, idx[i], vb_size / (vb_stride * 4));
    767 				return -EINVAL;
    768 			}
    769 		}
    770 
    771 		if (reorder) {
    772 			/* Need to reorder vertices for correct flat
    773 			 * shading while preserving the clock sense
    774 			 * for correct culling. Only on Savage3D. */
    775 			int reorder[3] = { 2, -1, -1 };
    776 
    777 			BEGIN_DMA(count * vtx_size + 1);
    778 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    779 
    780 			for (i = 0; i < count; ++i) {
    781 				unsigned int j = idx[i + reorder[i % 3]];
    782 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    783 			}
    784 
    785 			DMA_COMMIT();
    786 		} else {
    787 			BEGIN_DMA(count * vtx_size + 1);
    788 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    789 
    790 			for (i = 0; i < count; ++i) {
    791 				unsigned int j = idx[i];
    792 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    793 			}
    794 
    795 			DMA_COMMIT();
    796 		}
    797 
    798 		idx += count;
    799 		n -= count;
    800 
    801 		prim |= BCI_CMD_DRAW_CONT;
    802 	}
    803 
    804 	return 0;
    805 }
    806 
    807 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
    808 				 const drm_savage_cmd_header_t * cmd_header,
    809 				 const drm_savage_cmd_header_t *data,
    810 				 unsigned int nbox,
    811 				 const struct drm_clip_rect *boxes)
    812 {
    813 	unsigned int flags = cmd_header->clear0.flags;
    814 	unsigned int clear_cmd;
    815 	unsigned int i, nbufs;
    816 	DMA_LOCALS;
    817 
    818 	if (nbox == 0)
    819 		return 0;
    820 
    821 	clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
    822 	    BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
    823 	BCI_CMD_SET_ROP(clear_cmd, 0xCC);
    824 
    825 	nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
    826 	    ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
    827 	if (nbufs == 0)
    828 		return 0;
    829 
    830 	if (data->clear1.mask != 0xffffffff) {
    831 		/* set mask */
    832 		BEGIN_DMA(2);
    833 		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
    834 		DMA_WRITE(data->clear1.mask);
    835 		DMA_COMMIT();
    836 	}
    837 	for (i = 0; i < nbox; ++i) {
    838 		unsigned int x, y, w, h;
    839 		unsigned int buf;
    840 		x = boxes[i].x1, y = boxes[i].y1;
    841 		w = boxes[i].x2 - boxes[i].x1;
    842 		h = boxes[i].y2 - boxes[i].y1;
    843 		BEGIN_DMA(nbufs * 6);
    844 		for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
    845 			if (!(flags & buf))
    846 				continue;
    847 			DMA_WRITE(clear_cmd);
    848 			switch (buf) {
    849 			case SAVAGE_FRONT:
    850 				DMA_WRITE(dev_priv->front_offset);
    851 				DMA_WRITE(dev_priv->front_bd);
    852 				break;
    853 			case SAVAGE_BACK:
    854 				DMA_WRITE(dev_priv->back_offset);
    855 				DMA_WRITE(dev_priv->back_bd);
    856 				break;
    857 			case SAVAGE_DEPTH:
    858 				DMA_WRITE(dev_priv->depth_offset);
    859 				DMA_WRITE(dev_priv->depth_bd);
    860 				break;
    861 			}
    862 			DMA_WRITE(data->clear1.value);
    863 			DMA_WRITE(BCI_X_Y(x, y));
    864 			DMA_WRITE(BCI_W_H(w, h));
    865 		}
    866 		DMA_COMMIT();
    867 	}
    868 	if (data->clear1.mask != 0xffffffff) {
    869 		/* reset mask */
    870 		BEGIN_DMA(2);
    871 		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
    872 		DMA_WRITE(0xffffffff);
    873 		DMA_COMMIT();
    874 	}
    875 
    876 	return 0;
    877 }
    878 
    879 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
    880 				unsigned int nbox, const struct drm_clip_rect *boxes)
    881 {
    882 	unsigned int swap_cmd;
    883 	unsigned int i;
    884 	DMA_LOCALS;
    885 
    886 	if (nbox == 0)
    887 		return 0;
    888 
    889 	swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
    890 	    BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
    891 	BCI_CMD_SET_ROP(swap_cmd, 0xCC);
    892 
    893 	for (i = 0; i < nbox; ++i) {
    894 		BEGIN_DMA(6);
    895 		DMA_WRITE(swap_cmd);
    896 		DMA_WRITE(dev_priv->back_offset);
    897 		DMA_WRITE(dev_priv->back_bd);
    898 		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
    899 		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
    900 		DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
    901 				  boxes[i].y2 - boxes[i].y1));
    902 		DMA_COMMIT();
    903 	}
    904 
    905 	return 0;
    906 }
    907 
    908 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
    909 				const drm_savage_cmd_header_t *start,
    910 				const drm_savage_cmd_header_t *end,
    911 				const struct drm_buf * dmabuf,
    912 				const unsigned int *vtxbuf,
    913 				unsigned int vb_size, unsigned int vb_stride,
    914 				unsigned int nbox,
    915 				const struct drm_clip_rect *boxes)
    916 {
    917 	unsigned int i, j;
    918 	int ret;
    919 
    920 	for (i = 0; i < nbox; ++i) {
    921 		const drm_savage_cmd_header_t *cmdbuf;
    922 		dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
    923 
    924 		cmdbuf = start;
    925 		while (cmdbuf < end) {
    926 			drm_savage_cmd_header_t cmd_header;
    927 			cmd_header = *cmdbuf;
    928 			cmdbuf++;
    929 			switch (cmd_header.cmd.cmd) {
    930 			case SAVAGE_CMD_DMA_PRIM:
    931 				ret = savage_dispatch_dma_prim(
    932 					dev_priv, &cmd_header, dmabuf);
    933 				break;
    934 			case SAVAGE_CMD_VB_PRIM:
    935 				ret = savage_dispatch_vb_prim(
    936 					dev_priv, &cmd_header,
    937 					vtxbuf, vb_size, vb_stride);
    938 				break;
    939 			case SAVAGE_CMD_DMA_IDX:
    940 				j = (cmd_header.idx.count + 3) / 4;
    941 				/* j was check in savage_bci_cmdbuf */
    942 				ret = savage_dispatch_dma_idx(dev_priv,
    943 					&cmd_header, (const uint16_t *)cmdbuf,
    944 					dmabuf);
    945 				cmdbuf += j;
    946 				break;
    947 			case SAVAGE_CMD_VB_IDX:
    948 				j = (cmd_header.idx.count + 3) / 4;
    949 				/* j was check in savage_bci_cmdbuf */
    950 				ret = savage_dispatch_vb_idx(dev_priv,
    951 					&cmd_header, (const uint16_t *)cmdbuf,
    952 					(const uint32_t *)vtxbuf, vb_size,
    953 					vb_stride);
    954 				cmdbuf += j;
    955 				break;
    956 			default:
    957 				/* What's the best return code? EFAULT? */
    958 				DRM_ERROR("IMPLEMENTATION ERROR: "
    959 					  "non-drawing-command %d\n",
    960 					  cmd_header.cmd.cmd);
    961 				return -EINVAL;
    962 			}
    963 
    964 			if (ret != 0)
    965 				return ret;
    966 		}
    967 	}
    968 
    969 	return 0;
    970 }
    971 
    972 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
    973 {
    974 	drm_savage_private_t *dev_priv = dev->dev_private;
    975 	struct drm_device_dma *dma = dev->dma;
    976 	struct drm_buf *dmabuf;
    977 	drm_savage_cmdbuf_t *cmdbuf = data;
    978 	drm_savage_cmd_header_t *kcmd_addr = NULL;
    979 	drm_savage_cmd_header_t *first_draw_cmd;
    980 	unsigned int *kvb_addr = NULL;
    981 	struct drm_clip_rect *kbox_addr = NULL;
    982 	unsigned int i, j;
    983 	int ret = 0;
    984 
    985 	DRM_DEBUG("\n");
    986 
    987 	LOCK_TEST_WITH_RETURN(dev, file_priv);
    988 
    989 	if (dma && dma->buflist) {
    990 		if (cmdbuf->dma_idx >= dma->buf_count) {
    991 			DRM_ERROR
    992 			    ("vertex buffer index %u out of range (0-%u)\n",
    993 			     cmdbuf->dma_idx, dma->buf_count - 1);
    994 			return -EINVAL;
    995 		}
    996 		dmabuf = dma->buflist[cmdbuf->dma_idx];
    997 	} else {
    998 		dmabuf = NULL;
    999 	}
   1000 
   1001 	/* Copy the user buffers into kernel temporary areas.  This hasn't been
   1002 	 * a performance loss compared to VERIFYAREA_READ/
   1003 	 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
   1004 	 * for locking on FreeBSD.
   1005 	 */
   1006 	if (cmdbuf->size) {
   1007 		kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL);
   1008 		if (kcmd_addr == NULL)
   1009 			return -ENOMEM;
   1010 
   1011 		if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr,
   1012 				       cmdbuf->size * 8))
   1013 		{
   1014 			kfree(kcmd_addr);
   1015 			return -EFAULT;
   1016 		}
   1017 		cmdbuf->cmd_addr = kcmd_addr;
   1018 	}
   1019 	if (cmdbuf->vb_size) {
   1020 		kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size);
   1021 		if (IS_ERR(kvb_addr)) {
   1022 			ret = PTR_ERR(kvb_addr);
   1023 			kvb_addr = NULL;
   1024 			goto done;
   1025 		}
   1026 		cmdbuf->vb_addr = kvb_addr;
   1027 	}
   1028 	if (cmdbuf->nbox) {
   1029 		kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect),
   1030 					  GFP_KERNEL);
   1031 		if (kbox_addr == NULL) {
   1032 			ret = -ENOMEM;
   1033 			goto done;
   1034 		}
   1035 
   1036 		if (copy_from_user(kbox_addr, cmdbuf->box_addr,
   1037 				       cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
   1038 			ret = -EFAULT;
   1039 			goto done;
   1040 		}
   1041 	cmdbuf->box_addr = kbox_addr;
   1042 	}
   1043 
   1044 	/* Make sure writes to DMA buffers are finished before sending
   1045 	 * DMA commands to the graphics hardware. */
   1046 	mb();
   1047 
   1048 	/* Coming from user space. Don't know if the Xserver has
   1049 	 * emitted wait commands. Assuming the worst. */
   1050 	dev_priv->waiting = 1;
   1051 
   1052 	i = 0;
   1053 	first_draw_cmd = NULL;
   1054 	while (i < cmdbuf->size) {
   1055 		drm_savage_cmd_header_t cmd_header;
   1056 		cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
   1057 		cmdbuf->cmd_addr++;
   1058 		i++;
   1059 
   1060 		/* Group drawing commands with same state to minimize
   1061 		 * iterations over clip rects. */
   1062 		j = 0;
   1063 		switch (cmd_header.cmd.cmd) {
   1064 		case SAVAGE_CMD_DMA_IDX:
   1065 		case SAVAGE_CMD_VB_IDX:
   1066 			j = (cmd_header.idx.count + 3) / 4;
   1067 			if (i + j > cmdbuf->size) {
   1068 				DRM_ERROR("indexed drawing command extends "
   1069 					  "beyond end of command buffer\n");
   1070 				DMA_FLUSH();
   1071 				ret = -EINVAL;
   1072 				goto done;
   1073 			}
   1074 			/* fall through */
   1075 		case SAVAGE_CMD_DMA_PRIM:
   1076 		case SAVAGE_CMD_VB_PRIM:
   1077 			if (!first_draw_cmd)
   1078 				first_draw_cmd = cmdbuf->cmd_addr - 1;
   1079 			cmdbuf->cmd_addr += j;
   1080 			i += j;
   1081 			break;
   1082 		default:
   1083 			if (first_draw_cmd) {
   1084 				ret = savage_dispatch_draw(
   1085 				      dev_priv, first_draw_cmd,
   1086 				      cmdbuf->cmd_addr - 1,
   1087 				      dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
   1088 				      cmdbuf->vb_stride,
   1089 				      cmdbuf->nbox, cmdbuf->box_addr);
   1090 				if (ret != 0)
   1091 					goto done;
   1092 				first_draw_cmd = NULL;
   1093 			}
   1094 		}
   1095 		if (first_draw_cmd)
   1096 			continue;
   1097 
   1098 		switch (cmd_header.cmd.cmd) {
   1099 		case SAVAGE_CMD_STATE:
   1100 			j = (cmd_header.state.count + 1) / 2;
   1101 			if (i + j > cmdbuf->size) {
   1102 				DRM_ERROR("command SAVAGE_CMD_STATE extends "
   1103 					  "beyond end of command buffer\n");
   1104 				DMA_FLUSH();
   1105 				ret = -EINVAL;
   1106 				goto done;
   1107 			}
   1108 			ret = savage_dispatch_state(dev_priv, &cmd_header,
   1109 				(const uint32_t *)cmdbuf->cmd_addr);
   1110 			cmdbuf->cmd_addr += j;
   1111 			i += j;
   1112 			break;
   1113 		case SAVAGE_CMD_CLEAR:
   1114 			if (i + 1 > cmdbuf->size) {
   1115 				DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
   1116 					  "beyond end of command buffer\n");
   1117 				DMA_FLUSH();
   1118 				ret = -EINVAL;
   1119 				goto done;
   1120 			}
   1121 			ret = savage_dispatch_clear(dev_priv, &cmd_header,
   1122 						    cmdbuf->cmd_addr,
   1123 						    cmdbuf->nbox,
   1124 						    cmdbuf->box_addr);
   1125 			cmdbuf->cmd_addr++;
   1126 			i++;
   1127 			break;
   1128 		case SAVAGE_CMD_SWAP:
   1129 			ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
   1130 						   cmdbuf->box_addr);
   1131 			break;
   1132 		default:
   1133 			DRM_ERROR("invalid command 0x%x\n",
   1134 				  cmd_header.cmd.cmd);
   1135 			DMA_FLUSH();
   1136 			ret = -EINVAL;
   1137 			goto done;
   1138 		}
   1139 
   1140 		if (ret != 0) {
   1141 			DMA_FLUSH();
   1142 			goto done;
   1143 		}
   1144 	}
   1145 
   1146 	if (first_draw_cmd) {
   1147 		ret = savage_dispatch_draw (
   1148 			dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
   1149 			cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
   1150 			cmdbuf->nbox, cmdbuf->box_addr);
   1151 		if (ret != 0) {
   1152 			DMA_FLUSH();
   1153 			goto done;
   1154 		}
   1155 	}
   1156 
   1157 	DMA_FLUSH();
   1158 
   1159 	if (dmabuf && cmdbuf->discard) {
   1160 		drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
   1161 		uint16_t event;
   1162 		event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
   1163 		SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
   1164 		savage_freelist_put(dev, dmabuf);
   1165 	}
   1166 
   1167 done:
   1168 	/* If we didn't need to allocate them, these'll be NULL */
   1169 	kfree(kcmd_addr);
   1170 	kfree(kvb_addr);
   1171 	kfree(kbox_addr);
   1172 
   1173 	return ret;
   1174 }
   1175