Home | History | Annotate | Line # | Download | only in savage
savage_state.c revision 1.1.1.2.28.1
      1 /*	$NetBSD: savage_state.c,v 1.1.1.2.28.1 2018/09/06 06:56:33 pgoyette Exp $	*/
      2 
      3 /* savage_state.c -- State and drawing support for Savage
      4  *
      5  * Copyright 2004  Felix Kuehling
      6  * All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sub license,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     22  * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
     24  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  */
     27 #include <sys/cdefs.h>
     28 __KERNEL_RCSID(0, "$NetBSD: savage_state.c,v 1.1.1.2.28.1 2018/09/06 06:56:33 pgoyette Exp $");
     29 
     30 #include <drm/drmP.h>
     31 #include <drm/savage_drm.h>
     32 #include "savage_drv.h"
     33 
     34 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
     35 			       const struct drm_clip_rect * pbox)
     36 {
     37 	uint32_t scstart = dev_priv->state.s3d.new_scstart;
     38 	uint32_t scend = dev_priv->state.s3d.new_scend;
     39 	scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
     40 	    ((uint32_t) pbox->x1 & 0x000007ff) |
     41 	    (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
     42 	scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
     43 	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
     44 	    ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
     45 	if (scstart != dev_priv->state.s3d.scstart ||
     46 	    scend != dev_priv->state.s3d.scend) {
     47 		DMA_LOCALS;
     48 		BEGIN_DMA(4);
     49 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
     50 		DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
     51 		DMA_WRITE(scstart);
     52 		DMA_WRITE(scend);
     53 		dev_priv->state.s3d.scstart = scstart;
     54 		dev_priv->state.s3d.scend = scend;
     55 		dev_priv->waiting = 1;
     56 		DMA_COMMIT();
     57 	}
     58 }
     59 
     60 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
     61 			      const struct drm_clip_rect * pbox)
     62 {
     63 	uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
     64 	uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
     65 	drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
     66 	    ((uint32_t) pbox->x1 & 0x000007ff) |
     67 	    (((uint32_t) pbox->y1 << 12) & 0x00fff000);
     68 	drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
     69 	    (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
     70 	    ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
     71 	if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
     72 	    drawctrl1 != dev_priv->state.s4.drawctrl1) {
     73 		DMA_LOCALS;
     74 		BEGIN_DMA(4);
     75 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
     76 		DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
     77 		DMA_WRITE(drawctrl0);
     78 		DMA_WRITE(drawctrl1);
     79 		dev_priv->state.s4.drawctrl0 = drawctrl0;
     80 		dev_priv->state.s4.drawctrl1 = drawctrl1;
     81 		dev_priv->waiting = 1;
     82 		DMA_COMMIT();
     83 	}
     84 }
     85 
     86 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
     87 				 uint32_t addr)
     88 {
     89 	if ((addr & 6) != 2) {	/* reserved bits */
     90 		DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
     91 		return -EINVAL;
     92 	}
     93 	if (!(addr & 1)) {	/* local */
     94 		addr &= ~7;
     95 		if (addr < dev_priv->texture_offset ||
     96 		    addr >= dev_priv->texture_offset + dev_priv->texture_size) {
     97 			DRM_ERROR
     98 			    ("bad texAddr%d %08x (local addr out of range)\n",
     99 			     unit, addr);
    100 			return -EINVAL;
    101 		}
    102 	} else {		/* AGP */
    103 		if (!dev_priv->agp_textures) {
    104 			DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
    105 				  unit, addr);
    106 			return -EINVAL;
    107 		}
    108 		addr &= ~7;
    109 		if (addr < dev_priv->agp_textures->offset ||
    110 		    addr >= (dev_priv->agp_textures->offset +
    111 			     dev_priv->agp_textures->size)) {
    112 			DRM_ERROR
    113 			    ("bad texAddr%d %08x (AGP addr out of range)\n",
    114 			     unit, addr);
    115 			return -EINVAL;
    116 		}
    117 	}
    118 	return 0;
    119 }
    120 
    121 #define SAVE_STATE(reg,where)			\
    122 	if(start <= reg && start+count > reg)	\
    123 		dev_priv->state.where = regs[reg - start]
    124 #define SAVE_STATE_MASK(reg,where,mask) do {			\
    125 	if(start <= reg && start+count > reg) {			\
    126 		uint32_t tmp;					\
    127 		tmp = regs[reg - start];			\
    128 		dev_priv->state.where = (tmp & (mask)) |	\
    129 			(dev_priv->state.where & ~(mask));	\
    130 	}							\
    131 } while (0)
    132 
    133 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
    134 				   unsigned int start, unsigned int count,
    135 				   const uint32_t *regs)
    136 {
    137 	if (start < SAVAGE_TEXPALADDR_S3D ||
    138 	    start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
    139 		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
    140 			  start, start + count - 1);
    141 		return -EINVAL;
    142 	}
    143 
    144 	SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
    145 			~SAVAGE_SCISSOR_MASK_S3D);
    146 	SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
    147 			~SAVAGE_SCISSOR_MASK_S3D);
    148 
    149 	/* if any texture regs were changed ... */
    150 	if (start <= SAVAGE_TEXCTRL_S3D &&
    151 	    start + count > SAVAGE_TEXPALADDR_S3D) {
    152 		/* ... check texture state */
    153 		SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
    154 		SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
    155 		if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
    156 			return savage_verify_texaddr(dev_priv, 0,
    157 						dev_priv->state.s3d.texaddr);
    158 	}
    159 
    160 	return 0;
    161 }
    162 
    163 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
    164 				  unsigned int start, unsigned int count,
    165 				  const uint32_t *regs)
    166 {
    167 	int ret = 0;
    168 
    169 	if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
    170 	    start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
    171 		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
    172 			  start, start + count - 1);
    173 		return -EINVAL;
    174 	}
    175 
    176 	SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
    177 			~SAVAGE_SCISSOR_MASK_S4);
    178 	SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
    179 			~SAVAGE_SCISSOR_MASK_S4);
    180 
    181 	/* if any texture regs were changed ... */
    182 	if (start <= SAVAGE_TEXDESCR_S4 &&
    183 	    start + count > SAVAGE_TEXPALADDR_S4) {
    184 		/* ... check texture state */
    185 		SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
    186 		SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
    187 		SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
    188 		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
    189 			ret |= savage_verify_texaddr(dev_priv, 0,
    190 						dev_priv->state.s4.texaddr0);
    191 		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
    192 			ret |= savage_verify_texaddr(dev_priv, 1,
    193 						dev_priv->state.s4.texaddr1);
    194 	}
    195 
    196 	return ret;
    197 }
    198 
    199 #undef SAVE_STATE
    200 #undef SAVE_STATE_MASK
    201 
    202 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
    203 				 const drm_savage_cmd_header_t * cmd_header,
    204 				 const uint32_t *regs)
    205 {
    206 	unsigned int count = cmd_header->state.count;
    207 	unsigned int start = cmd_header->state.start;
    208 	unsigned int count2 = 0;
    209 	unsigned int bci_size;
    210 	int ret;
    211 	DMA_LOCALS;
    212 
    213 	if (!count)
    214 		return 0;
    215 
    216 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    217 		ret = savage_verify_state_s3d(dev_priv, start, count, regs);
    218 		if (ret != 0)
    219 			return ret;
    220 		/* scissor regs are emitted in savage_dispatch_draw */
    221 		if (start < SAVAGE_SCSTART_S3D) {
    222 			if (start + count > SAVAGE_SCEND_S3D + 1)
    223 				count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
    224 			if (start + count > SAVAGE_SCSTART_S3D)
    225 				count = SAVAGE_SCSTART_S3D - start;
    226 		} else if (start <= SAVAGE_SCEND_S3D) {
    227 			if (start + count > SAVAGE_SCEND_S3D + 1) {
    228 				count -= SAVAGE_SCEND_S3D + 1 - start;
    229 				start = SAVAGE_SCEND_S3D + 1;
    230 			} else
    231 				return 0;
    232 		}
    233 	} else {
    234 		ret = savage_verify_state_s4(dev_priv, start, count, regs);
    235 		if (ret != 0)
    236 			return ret;
    237 		/* scissor regs are emitted in savage_dispatch_draw */
    238 		if (start < SAVAGE_DRAWCTRL0_S4) {
    239 			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
    240 				count2 = count -
    241 					 (SAVAGE_DRAWCTRL1_S4 + 1 - start);
    242 			if (start + count > SAVAGE_DRAWCTRL0_S4)
    243 				count = SAVAGE_DRAWCTRL0_S4 - start;
    244 		} else if (start <= SAVAGE_DRAWCTRL1_S4) {
    245 			if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
    246 				count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
    247 				start = SAVAGE_DRAWCTRL1_S4 + 1;
    248 			} else
    249 				return 0;
    250 		}
    251 	}
    252 
    253 	bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
    254 
    255 	if (cmd_header->state.global) {
    256 		BEGIN_DMA(bci_size + 1);
    257 		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
    258 		dev_priv->waiting = 1;
    259 	} else {
    260 		BEGIN_DMA(bci_size);
    261 	}
    262 
    263 	do {
    264 		while (count > 0) {
    265 			unsigned int n = count < 255 ? count : 255;
    266 			DMA_SET_REGISTERS(start, n);
    267 			DMA_COPY(regs, n);
    268 			count -= n;
    269 			start += n;
    270 			regs += n;
    271 		}
    272 		start += 2;
    273 		regs += 2;
    274 		count = count2;
    275 		count2 = 0;
    276 	} while (count);
    277 
    278 	DMA_COMMIT();
    279 
    280 	return 0;
    281 }
    282 
    283 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
    284 				    const drm_savage_cmd_header_t * cmd_header,
    285 				    const struct drm_buf * dmabuf)
    286 {
    287 	unsigned char reorder = 0;
    288 	unsigned int prim = cmd_header->prim.prim;
    289 	unsigned int skip = cmd_header->prim.skip;
    290 	unsigned int n = cmd_header->prim.count;
    291 	unsigned int start = cmd_header->prim.start;
    292 	unsigned int i;
    293 	BCI_LOCALS;
    294 
    295 	if (!dmabuf) {
    296 		DRM_ERROR("called without dma buffers!\n");
    297 		return -EINVAL;
    298 	}
    299 
    300 	if (!n)
    301 		return 0;
    302 
    303 	switch (prim) {
    304 	case SAVAGE_PRIM_TRILIST_201:
    305 		reorder = 1;
    306 		prim = SAVAGE_PRIM_TRILIST;
    307 	case SAVAGE_PRIM_TRILIST:
    308 		if (n % 3 != 0) {
    309 			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
    310 				  n);
    311 			return -EINVAL;
    312 		}
    313 		break;
    314 	case SAVAGE_PRIM_TRISTRIP:
    315 	case SAVAGE_PRIM_TRIFAN:
    316 		if (n < 3) {
    317 			DRM_ERROR
    318 			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
    319 			     n);
    320 			return -EINVAL;
    321 		}
    322 		break;
    323 	default:
    324 		DRM_ERROR("invalid primitive type %u\n", prim);
    325 		return -EINVAL;
    326 	}
    327 
    328 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    329 		if (skip != 0) {
    330 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    331 			return -EINVAL;
    332 		}
    333 	} else {
    334 		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
    335 		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
    336 		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
    337 		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
    338 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    339 			return -EINVAL;
    340 		}
    341 		if (reorder) {
    342 			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
    343 			return -EINVAL;
    344 		}
    345 	}
    346 
    347 	if (start + n > dmabuf->total / 32) {
    348 		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
    349 			  start, start + n - 1, dmabuf->total / 32);
    350 		return -EINVAL;
    351 	}
    352 
    353 	/* Vertex DMA doesn't work with command DMA at the same time,
    354 	 * so we use BCI_... to submit commands here. Flush buffered
    355 	 * faked DMA first. */
    356 	DMA_FLUSH();
    357 
    358 	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
    359 		BEGIN_BCI(2);
    360 		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
    361 		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
    362 		dev_priv->state.common.vbaddr = dmabuf->bus_address;
    363 	}
    364 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
    365 		/* Workaround for what looks like a hardware bug. If a
    366 		 * WAIT_3D_IDLE was emitted some time before the
    367 		 * indexed drawing command then the engine will lock
    368 		 * up. There are two known workarounds:
    369 		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
    370 		BEGIN_BCI(63);
    371 		for (i = 0; i < 63; ++i)
    372 			BCI_WRITE(BCI_CMD_WAIT);
    373 		dev_priv->waiting = 0;
    374 	}
    375 
    376 	prim <<= 25;
    377 	while (n != 0) {
    378 		/* Can emit up to 255 indices (85 triangles) at once. */
    379 		unsigned int count = n > 255 ? 255 : n;
    380 		if (reorder) {
    381 			/* Need to reorder indices for correct flat
    382 			 * shading while preserving the clock sense
    383 			 * for correct culling. Only on Savage3D. */
    384 			int reorder[3] = { -1, -1, -1 };
    385 			reorder[start % 3] = 2;
    386 
    387 			BEGIN_BCI((count + 1 + 1) / 2);
    388 			BCI_DRAW_INDICES_S3D(count, prim, start + 2);
    389 
    390 			for (i = start + 1; i + 1 < start + count; i += 2)
    391 				BCI_WRITE((i + reorder[i % 3]) |
    392 					  ((i + 1 +
    393 					    reorder[(i + 1) % 3]) << 16));
    394 			if (i < start + count)
    395 				BCI_WRITE(i + reorder[i % 3]);
    396 		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    397 			BEGIN_BCI((count + 1 + 1) / 2);
    398 			BCI_DRAW_INDICES_S3D(count, prim, start);
    399 
    400 			for (i = start + 1; i + 1 < start + count; i += 2)
    401 				BCI_WRITE(i | ((i + 1) << 16));
    402 			if (i < start + count)
    403 				BCI_WRITE(i);
    404 		} else {
    405 			BEGIN_BCI((count + 2 + 1) / 2);
    406 			BCI_DRAW_INDICES_S4(count, prim, skip);
    407 
    408 			for (i = start; i + 1 < start + count; i += 2)
    409 				BCI_WRITE(i | ((i + 1) << 16));
    410 			if (i < start + count)
    411 				BCI_WRITE(i);
    412 		}
    413 
    414 		start += count;
    415 		n -= count;
    416 
    417 		prim |= BCI_CMD_DRAW_CONT;
    418 	}
    419 
    420 	return 0;
    421 }
    422 
    423 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
    424 				   const drm_savage_cmd_header_t * cmd_header,
    425 				   const uint32_t *vtxbuf, unsigned int vb_size,
    426 				   unsigned int vb_stride)
    427 {
    428 	unsigned char reorder = 0;
    429 	unsigned int prim = cmd_header->prim.prim;
    430 	unsigned int skip = cmd_header->prim.skip;
    431 	unsigned int n = cmd_header->prim.count;
    432 	unsigned int start = cmd_header->prim.start;
    433 	unsigned int vtx_size;
    434 	unsigned int i;
    435 	DMA_LOCALS;
    436 
    437 	if (!n)
    438 		return 0;
    439 
    440 	switch (prim) {
    441 	case SAVAGE_PRIM_TRILIST_201:
    442 		reorder = 1;
    443 		prim = SAVAGE_PRIM_TRILIST;
    444 	case SAVAGE_PRIM_TRILIST:
    445 		if (n % 3 != 0) {
    446 			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
    447 				  n);
    448 			return -EINVAL;
    449 		}
    450 		break;
    451 	case SAVAGE_PRIM_TRISTRIP:
    452 	case SAVAGE_PRIM_TRIFAN:
    453 		if (n < 3) {
    454 			DRM_ERROR
    455 			    ("wrong number of vertices %u in TRIFAN/STRIP\n",
    456 			     n);
    457 			return -EINVAL;
    458 		}
    459 		break;
    460 	default:
    461 		DRM_ERROR("invalid primitive type %u\n", prim);
    462 		return -EINVAL;
    463 	}
    464 
    465 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    466 		if (skip > SAVAGE_SKIP_ALL_S3D) {
    467 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    468 			return -EINVAL;
    469 		}
    470 		vtx_size = 8;	/* full vertex */
    471 	} else {
    472 		if (skip > SAVAGE_SKIP_ALL_S4) {
    473 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    474 			return -EINVAL;
    475 		}
    476 		vtx_size = 10;	/* full vertex */
    477 	}
    478 
    479 	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
    480 	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
    481 	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
    482 
    483 	if (vtx_size > vb_stride) {
    484 		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
    485 			  vtx_size, vb_stride);
    486 		return -EINVAL;
    487 	}
    488 
    489 	if (start + n > vb_size / (vb_stride * 4)) {
    490 		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
    491 			  start, start + n - 1, vb_size / (vb_stride * 4));
    492 		return -EINVAL;
    493 	}
    494 
    495 	prim <<= 25;
    496 	while (n != 0) {
    497 		/* Can emit up to 255 vertices (85 triangles) at once. */
    498 		unsigned int count = n > 255 ? 255 : n;
    499 		if (reorder) {
    500 			/* Need to reorder vertices for correct flat
    501 			 * shading while preserving the clock sense
    502 			 * for correct culling. Only on Savage3D. */
    503 			int reorder[3] = { -1, -1, -1 };
    504 			reorder[start % 3] = 2;
    505 
    506 			BEGIN_DMA(count * vtx_size + 1);
    507 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    508 
    509 			for (i = start; i < start + count; ++i) {
    510 				unsigned int j = i + reorder[i % 3];
    511 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    512 			}
    513 
    514 			DMA_COMMIT();
    515 		} else {
    516 			BEGIN_DMA(count * vtx_size + 1);
    517 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    518 
    519 			if (vb_stride == vtx_size) {
    520 				DMA_COPY(&vtxbuf[vb_stride * start],
    521 					 vtx_size * count);
    522 			} else {
    523 				for (i = start; i < start + count; ++i) {
    524 					DMA_COPY(&vtxbuf [vb_stride * i],
    525 						 vtx_size);
    526 				}
    527 			}
    528 
    529 			DMA_COMMIT();
    530 		}
    531 
    532 		start += count;
    533 		n -= count;
    534 
    535 		prim |= BCI_CMD_DRAW_CONT;
    536 	}
    537 
    538 	return 0;
    539 }
    540 
    541 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
    542 				   const drm_savage_cmd_header_t * cmd_header,
    543 				   const uint16_t *idx,
    544 				   const struct drm_buf * dmabuf)
    545 {
    546 	unsigned char reorder = 0;
    547 	unsigned int prim = cmd_header->idx.prim;
    548 	unsigned int skip = cmd_header->idx.skip;
    549 	unsigned int n = cmd_header->idx.count;
    550 	unsigned int i;
    551 	BCI_LOCALS;
    552 
    553 	if (!dmabuf) {
    554 		DRM_ERROR("called without dma buffers!\n");
    555 		return -EINVAL;
    556 	}
    557 
    558 	if (!n)
    559 		return 0;
    560 
    561 	switch (prim) {
    562 	case SAVAGE_PRIM_TRILIST_201:
    563 		reorder = 1;
    564 		prim = SAVAGE_PRIM_TRILIST;
    565 	case SAVAGE_PRIM_TRILIST:
    566 		if (n % 3 != 0) {
    567 			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
    568 			return -EINVAL;
    569 		}
    570 		break;
    571 	case SAVAGE_PRIM_TRISTRIP:
    572 	case SAVAGE_PRIM_TRIFAN:
    573 		if (n < 3) {
    574 			DRM_ERROR
    575 			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
    576 			return -EINVAL;
    577 		}
    578 		break;
    579 	default:
    580 		DRM_ERROR("invalid primitive type %u\n", prim);
    581 		return -EINVAL;
    582 	}
    583 
    584 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    585 		if (skip != 0) {
    586 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    587 			return -EINVAL;
    588 		}
    589 	} else {
    590 		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
    591 		    (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
    592 		    (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
    593 		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
    594 			DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
    595 			return -EINVAL;
    596 		}
    597 		if (reorder) {
    598 			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
    599 			return -EINVAL;
    600 		}
    601 	}
    602 
    603 	/* Vertex DMA doesn't work with command DMA at the same time,
    604 	 * so we use BCI_... to submit commands here. Flush buffered
    605 	 * faked DMA first. */
    606 	DMA_FLUSH();
    607 
    608 	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
    609 		BEGIN_BCI(2);
    610 		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
    611 		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
    612 		dev_priv->state.common.vbaddr = dmabuf->bus_address;
    613 	}
    614 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
    615 		/* Workaround for what looks like a hardware bug. If a
    616 		 * WAIT_3D_IDLE was emitted some time before the
    617 		 * indexed drawing command then the engine will lock
    618 		 * up. There are two known workarounds:
    619 		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
    620 		BEGIN_BCI(63);
    621 		for (i = 0; i < 63; ++i)
    622 			BCI_WRITE(BCI_CMD_WAIT);
    623 		dev_priv->waiting = 0;
    624 	}
    625 
    626 	prim <<= 25;
    627 	while (n != 0) {
    628 		/* Can emit up to 255 indices (85 triangles) at once. */
    629 		unsigned int count = n > 255 ? 255 : n;
    630 
    631 		/* check indices */
    632 		for (i = 0; i < count; ++i) {
    633 			if (idx[i] > dmabuf->total / 32) {
    634 				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
    635 					  i, idx[i], dmabuf->total / 32);
    636 				return -EINVAL;
    637 			}
    638 		}
    639 
    640 		if (reorder) {
    641 			/* Need to reorder indices for correct flat
    642 			 * shading while preserving the clock sense
    643 			 * for correct culling. Only on Savage3D. */
    644 			int reorder[3] = { 2, -1, -1 };
    645 
    646 			BEGIN_BCI((count + 1 + 1) / 2);
    647 			BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
    648 
    649 			for (i = 1; i + 1 < count; i += 2)
    650 				BCI_WRITE(idx[i + reorder[i % 3]] |
    651 					  (idx[i + 1 +
    652 					   reorder[(i + 1) % 3]] << 16));
    653 			if (i < count)
    654 				BCI_WRITE(idx[i + reorder[i % 3]]);
    655 		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    656 			BEGIN_BCI((count + 1 + 1) / 2);
    657 			BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
    658 
    659 			for (i = 1; i + 1 < count; i += 2)
    660 				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
    661 			if (i < count)
    662 				BCI_WRITE(idx[i]);
    663 		} else {
    664 			BEGIN_BCI((count + 2 + 1) / 2);
    665 			BCI_DRAW_INDICES_S4(count, prim, skip);
    666 
    667 			for (i = 0; i + 1 < count; i += 2)
    668 				BCI_WRITE(idx[i] | (idx[i + 1] << 16));
    669 			if (i < count)
    670 				BCI_WRITE(idx[i]);
    671 		}
    672 
    673 		idx += count;
    674 		n -= count;
    675 
    676 		prim |= BCI_CMD_DRAW_CONT;
    677 	}
    678 
    679 	return 0;
    680 }
    681 
    682 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
    683 				  const drm_savage_cmd_header_t * cmd_header,
    684 				  const uint16_t *idx,
    685 				  const uint32_t *vtxbuf,
    686 				  unsigned int vb_size, unsigned int vb_stride)
    687 {
    688 	unsigned char reorder = 0;
    689 	unsigned int prim = cmd_header->idx.prim;
    690 	unsigned int skip = cmd_header->idx.skip;
    691 	unsigned int n = cmd_header->idx.count;
    692 	unsigned int vtx_size;
    693 	unsigned int i;
    694 	DMA_LOCALS;
    695 
    696 	if (!n)
    697 		return 0;
    698 
    699 	switch (prim) {
    700 	case SAVAGE_PRIM_TRILIST_201:
    701 		reorder = 1;
    702 		prim = SAVAGE_PRIM_TRILIST;
    703 	case SAVAGE_PRIM_TRILIST:
    704 		if (n % 3 != 0) {
    705 			DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
    706 			return -EINVAL;
    707 		}
    708 		break;
    709 	case SAVAGE_PRIM_TRISTRIP:
    710 	case SAVAGE_PRIM_TRIFAN:
    711 		if (n < 3) {
    712 			DRM_ERROR
    713 			    ("wrong number of indices %u in TRIFAN/STRIP\n", n);
    714 			return -EINVAL;
    715 		}
    716 		break;
    717 	default:
    718 		DRM_ERROR("invalid primitive type %u\n", prim);
    719 		return -EINVAL;
    720 	}
    721 
    722 	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
    723 		if (skip > SAVAGE_SKIP_ALL_S3D) {
    724 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    725 			return -EINVAL;
    726 		}
    727 		vtx_size = 8;	/* full vertex */
    728 	} else {
    729 		if (skip > SAVAGE_SKIP_ALL_S4) {
    730 			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
    731 			return -EINVAL;
    732 		}
    733 		vtx_size = 10;	/* full vertex */
    734 	}
    735 
    736 	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
    737 	    (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
    738 	    (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
    739 
    740 	if (vtx_size > vb_stride) {
    741 		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
    742 			  vtx_size, vb_stride);
    743 		return -EINVAL;
    744 	}
    745 
    746 	prim <<= 25;
    747 	while (n != 0) {
    748 		/* Can emit up to 255 vertices (85 triangles) at once. */
    749 		unsigned int count = n > 255 ? 255 : n;
    750 
    751 		/* Check indices */
    752 		for (i = 0; i < count; ++i) {
    753 			if (idx[i] > vb_size / (vb_stride * 4)) {
    754 				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
    755 					  i, idx[i], vb_size / (vb_stride * 4));
    756 				return -EINVAL;
    757 			}
    758 		}
    759 
    760 		if (reorder) {
    761 			/* Need to reorder vertices for correct flat
    762 			 * shading while preserving the clock sense
    763 			 * for correct culling. Only on Savage3D. */
    764 			int reorder[3] = { 2, -1, -1 };
    765 
    766 			BEGIN_DMA(count * vtx_size + 1);
    767 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    768 
    769 			for (i = 0; i < count; ++i) {
    770 				unsigned int j = idx[i + reorder[i % 3]];
    771 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    772 			}
    773 
    774 			DMA_COMMIT();
    775 		} else {
    776 			BEGIN_DMA(count * vtx_size + 1);
    777 			DMA_DRAW_PRIMITIVE(count, prim, skip);
    778 
    779 			for (i = 0; i < count; ++i) {
    780 				unsigned int j = idx[i];
    781 				DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
    782 			}
    783 
    784 			DMA_COMMIT();
    785 		}
    786 
    787 		idx += count;
    788 		n -= count;
    789 
    790 		prim |= BCI_CMD_DRAW_CONT;
    791 	}
    792 
    793 	return 0;
    794 }
    795 
    796 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
    797 				 const drm_savage_cmd_header_t * cmd_header,
    798 				 const drm_savage_cmd_header_t *data,
    799 				 unsigned int nbox,
    800 				 const struct drm_clip_rect *boxes)
    801 {
    802 	unsigned int flags = cmd_header->clear0.flags;
    803 	unsigned int clear_cmd;
    804 	unsigned int i, nbufs;
    805 	DMA_LOCALS;
    806 
    807 	if (nbox == 0)
    808 		return 0;
    809 
    810 	clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
    811 	    BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
    812 	BCI_CMD_SET_ROP(clear_cmd, 0xCC);
    813 
    814 	nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
    815 	    ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
    816 	if (nbufs == 0)
    817 		return 0;
    818 
    819 	if (data->clear1.mask != 0xffffffff) {
    820 		/* set mask */
    821 		BEGIN_DMA(2);
    822 		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
    823 		DMA_WRITE(data->clear1.mask);
    824 		DMA_COMMIT();
    825 	}
    826 	for (i = 0; i < nbox; ++i) {
    827 		unsigned int x, y, w, h;
    828 		unsigned int buf;
    829 		x = boxes[i].x1, y = boxes[i].y1;
    830 		w = boxes[i].x2 - boxes[i].x1;
    831 		h = boxes[i].y2 - boxes[i].y1;
    832 		BEGIN_DMA(nbufs * 6);
    833 		for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
    834 			if (!(flags & buf))
    835 				continue;
    836 			DMA_WRITE(clear_cmd);
    837 			switch (buf) {
    838 			case SAVAGE_FRONT:
    839 				DMA_WRITE(dev_priv->front_offset);
    840 				DMA_WRITE(dev_priv->front_bd);
    841 				break;
    842 			case SAVAGE_BACK:
    843 				DMA_WRITE(dev_priv->back_offset);
    844 				DMA_WRITE(dev_priv->back_bd);
    845 				break;
    846 			case SAVAGE_DEPTH:
    847 				DMA_WRITE(dev_priv->depth_offset);
    848 				DMA_WRITE(dev_priv->depth_bd);
    849 				break;
    850 			}
    851 			DMA_WRITE(data->clear1.value);
    852 			DMA_WRITE(BCI_X_Y(x, y));
    853 			DMA_WRITE(BCI_W_H(w, h));
    854 		}
    855 		DMA_COMMIT();
    856 	}
    857 	if (data->clear1.mask != 0xffffffff) {
    858 		/* reset mask */
    859 		BEGIN_DMA(2);
    860 		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
    861 		DMA_WRITE(0xffffffff);
    862 		DMA_COMMIT();
    863 	}
    864 
    865 	return 0;
    866 }
    867 
    868 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
    869 				unsigned int nbox, const struct drm_clip_rect *boxes)
    870 {
    871 	unsigned int swap_cmd;
    872 	unsigned int i;
    873 	DMA_LOCALS;
    874 
    875 	if (nbox == 0)
    876 		return 0;
    877 
    878 	swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
    879 	    BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
    880 	BCI_CMD_SET_ROP(swap_cmd, 0xCC);
    881 
    882 	for (i = 0; i < nbox; ++i) {
    883 		BEGIN_DMA(6);
    884 		DMA_WRITE(swap_cmd);
    885 		DMA_WRITE(dev_priv->back_offset);
    886 		DMA_WRITE(dev_priv->back_bd);
    887 		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
    888 		DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
    889 		DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
    890 				  boxes[i].y2 - boxes[i].y1));
    891 		DMA_COMMIT();
    892 	}
    893 
    894 	return 0;
    895 }
    896 
    897 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
    898 				const drm_savage_cmd_header_t *start,
    899 				const drm_savage_cmd_header_t *end,
    900 				const struct drm_buf * dmabuf,
    901 				const unsigned int *vtxbuf,
    902 				unsigned int vb_size, unsigned int vb_stride,
    903 				unsigned int nbox,
    904 				const struct drm_clip_rect *boxes)
    905 {
    906 	unsigned int i, j;
    907 	int ret;
    908 
    909 	for (i = 0; i < nbox; ++i) {
    910 		const drm_savage_cmd_header_t *cmdbuf;
    911 		dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
    912 
    913 		cmdbuf = start;
    914 		while (cmdbuf < end) {
    915 			drm_savage_cmd_header_t cmd_header;
    916 			cmd_header = *cmdbuf;
    917 			cmdbuf++;
    918 			switch (cmd_header.cmd.cmd) {
    919 			case SAVAGE_CMD_DMA_PRIM:
    920 				ret = savage_dispatch_dma_prim(
    921 					dev_priv, &cmd_header, dmabuf);
    922 				break;
    923 			case SAVAGE_CMD_VB_PRIM:
    924 				ret = savage_dispatch_vb_prim(
    925 					dev_priv, &cmd_header,
    926 					vtxbuf, vb_size, vb_stride);
    927 				break;
    928 			case SAVAGE_CMD_DMA_IDX:
    929 				j = (cmd_header.idx.count + 3) / 4;
    930 				/* j was check in savage_bci_cmdbuf */
    931 				ret = savage_dispatch_dma_idx(dev_priv,
    932 					&cmd_header, (const uint16_t *)cmdbuf,
    933 					dmabuf);
    934 				cmdbuf += j;
    935 				break;
    936 			case SAVAGE_CMD_VB_IDX:
    937 				j = (cmd_header.idx.count + 3) / 4;
    938 				/* j was check in savage_bci_cmdbuf */
    939 				ret = savage_dispatch_vb_idx(dev_priv,
    940 					&cmd_header, (const uint16_t *)cmdbuf,
    941 					(const uint32_t *)vtxbuf, vb_size,
    942 					vb_stride);
    943 				cmdbuf += j;
    944 				break;
    945 			default:
    946 				/* What's the best return code? EFAULT? */
    947 				DRM_ERROR("IMPLEMENTATION ERROR: "
    948 					  "non-drawing-command %d\n",
    949 					  cmd_header.cmd.cmd);
    950 				return -EINVAL;
    951 			}
    952 
    953 			if (ret != 0)
    954 				return ret;
    955 		}
    956 	}
    957 
    958 	return 0;
    959 }
    960 
    961 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
    962 {
    963 	drm_savage_private_t *dev_priv = dev->dev_private;
    964 	struct drm_device_dma *dma = dev->dma;
    965 	struct drm_buf *dmabuf;
    966 	drm_savage_cmdbuf_t *cmdbuf = data;
    967 	drm_savage_cmd_header_t *kcmd_addr = NULL;
    968 	drm_savage_cmd_header_t *first_draw_cmd;
    969 	unsigned int *kvb_addr = NULL;
    970 	struct drm_clip_rect *kbox_addr = NULL;
    971 	unsigned int i, j;
    972 	int ret = 0;
    973 
    974 	DRM_DEBUG("\n");
    975 
    976 	LOCK_TEST_WITH_RETURN(dev, file_priv);
    977 
    978 	if (dma && dma->buflist) {
    979 		if (cmdbuf->dma_idx > dma->buf_count) {
    980 			DRM_ERROR
    981 			    ("vertex buffer index %u out of range (0-%u)\n",
    982 			     cmdbuf->dma_idx, dma->buf_count - 1);
    983 			return -EINVAL;
    984 		}
    985 		dmabuf = dma->buflist[cmdbuf->dma_idx];
    986 	} else {
    987 		dmabuf = NULL;
    988 	}
    989 
    990 	/* Copy the user buffers into kernel temporary areas.  This hasn't been
    991 	 * a performance loss compared to VERIFYAREA_READ/
    992 	 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
    993 	 * for locking on FreeBSD.
    994 	 */
    995 	if (cmdbuf->size) {
    996 		kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL);
    997 		if (kcmd_addr == NULL)
    998 			return -ENOMEM;
    999 
   1000 		if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr,
   1001 				       cmdbuf->size * 8))
   1002 		{
   1003 			kfree(kcmd_addr);
   1004 			return -EFAULT;
   1005 		}
   1006 		cmdbuf->cmd_addr = kcmd_addr;
   1007 	}
   1008 	if (cmdbuf->vb_size) {
   1009 		kvb_addr = kmalloc(cmdbuf->vb_size, GFP_KERNEL);
   1010 		if (kvb_addr == NULL) {
   1011 			ret = -ENOMEM;
   1012 			goto done;
   1013 		}
   1014 
   1015 		if (copy_from_user(kvb_addr, cmdbuf->vb_addr,
   1016 				       cmdbuf->vb_size)) {
   1017 			ret = -EFAULT;
   1018 			goto done;
   1019 		}
   1020 		cmdbuf->vb_addr = kvb_addr;
   1021 	}
   1022 	if (cmdbuf->nbox) {
   1023 		kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect),
   1024 					  GFP_KERNEL);
   1025 		if (kbox_addr == NULL) {
   1026 			ret = -ENOMEM;
   1027 			goto done;
   1028 		}
   1029 
   1030 		if (copy_from_user(kbox_addr, cmdbuf->box_addr,
   1031 				       cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
   1032 			ret = -EFAULT;
   1033 			goto done;
   1034 		}
   1035 	cmdbuf->box_addr = kbox_addr;
   1036 	}
   1037 
   1038 	/* Make sure writes to DMA buffers are finished before sending
   1039 	 * DMA commands to the graphics hardware. */
   1040 	mb();
   1041 
   1042 	/* Coming from user space. Don't know if the Xserver has
   1043 	 * emitted wait commands. Assuming the worst. */
   1044 	dev_priv->waiting = 1;
   1045 
   1046 	i = 0;
   1047 	first_draw_cmd = NULL;
   1048 	while (i < cmdbuf->size) {
   1049 		drm_savage_cmd_header_t cmd_header;
   1050 		cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
   1051 		cmdbuf->cmd_addr++;
   1052 		i++;
   1053 
   1054 		/* Group drawing commands with same state to minimize
   1055 		 * iterations over clip rects. */
   1056 		j = 0;
   1057 		switch (cmd_header.cmd.cmd) {
   1058 		case SAVAGE_CMD_DMA_IDX:
   1059 		case SAVAGE_CMD_VB_IDX:
   1060 			j = (cmd_header.idx.count + 3) / 4;
   1061 			if (i + j > cmdbuf->size) {
   1062 				DRM_ERROR("indexed drawing command extends "
   1063 					  "beyond end of command buffer\n");
   1064 				DMA_FLUSH();
   1065 				ret = -EINVAL;
   1066 				goto done;
   1067 			}
   1068 			/* fall through */
   1069 		case SAVAGE_CMD_DMA_PRIM:
   1070 		case SAVAGE_CMD_VB_PRIM:
   1071 			if (!first_draw_cmd)
   1072 				first_draw_cmd = cmdbuf->cmd_addr - 1;
   1073 			cmdbuf->cmd_addr += j;
   1074 			i += j;
   1075 			break;
   1076 		default:
   1077 			if (first_draw_cmd) {
   1078 				ret = savage_dispatch_draw(
   1079 				      dev_priv, first_draw_cmd,
   1080 				      cmdbuf->cmd_addr - 1,
   1081 				      dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
   1082 				      cmdbuf->vb_stride,
   1083 				      cmdbuf->nbox, cmdbuf->box_addr);
   1084 				if (ret != 0)
   1085 					goto done;
   1086 				first_draw_cmd = NULL;
   1087 			}
   1088 		}
   1089 		if (first_draw_cmd)
   1090 			continue;
   1091 
   1092 		switch (cmd_header.cmd.cmd) {
   1093 		case SAVAGE_CMD_STATE:
   1094 			j = (cmd_header.state.count + 1) / 2;
   1095 			if (i + j > cmdbuf->size) {
   1096 				DRM_ERROR("command SAVAGE_CMD_STATE extends "
   1097 					  "beyond end of command buffer\n");
   1098 				DMA_FLUSH();
   1099 				ret = -EINVAL;
   1100 				goto done;
   1101 			}
   1102 			ret = savage_dispatch_state(dev_priv, &cmd_header,
   1103 				(const uint32_t *)cmdbuf->cmd_addr);
   1104 			cmdbuf->cmd_addr += j;
   1105 			i += j;
   1106 			break;
   1107 		case SAVAGE_CMD_CLEAR:
   1108 			if (i + 1 > cmdbuf->size) {
   1109 				DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
   1110 					  "beyond end of command buffer\n");
   1111 				DMA_FLUSH();
   1112 				ret = -EINVAL;
   1113 				goto done;
   1114 			}
   1115 			ret = savage_dispatch_clear(dev_priv, &cmd_header,
   1116 						    cmdbuf->cmd_addr,
   1117 						    cmdbuf->nbox,
   1118 						    cmdbuf->box_addr);
   1119 			cmdbuf->cmd_addr++;
   1120 			i++;
   1121 			break;
   1122 		case SAVAGE_CMD_SWAP:
   1123 			ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
   1124 						   cmdbuf->box_addr);
   1125 			break;
   1126 		default:
   1127 			DRM_ERROR("invalid command 0x%x\n",
   1128 				  cmd_header.cmd.cmd);
   1129 			DMA_FLUSH();
   1130 			ret = -EINVAL;
   1131 			goto done;
   1132 		}
   1133 
   1134 		if (ret != 0) {
   1135 			DMA_FLUSH();
   1136 			goto done;
   1137 		}
   1138 	}
   1139 
   1140 	if (first_draw_cmd) {
   1141 		ret = savage_dispatch_draw (
   1142 			dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
   1143 			cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
   1144 			cmdbuf->nbox, cmdbuf->box_addr);
   1145 		if (ret != 0) {
   1146 			DMA_FLUSH();
   1147 			goto done;
   1148 		}
   1149 	}
   1150 
   1151 	DMA_FLUSH();
   1152 
   1153 	if (dmabuf && cmdbuf->discard) {
   1154 		drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
   1155 		uint16_t event;
   1156 		event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
   1157 		SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
   1158 		savage_freelist_put(dev, dmabuf);
   1159 	}
   1160 
   1161 done:
   1162 	/* If we didn't need to allocate them, these'll be NULL */
   1163 	kfree(kcmd_addr);
   1164 	kfree(kvb_addr);
   1165 	kfree(kbox_addr);
   1166 
   1167 	return ret;
   1168 }
   1169