1 #ifndef _TSENG_ACCEL_H 2 #define _TSENG_ACCEL_H 3 /* 4 * Shortcuts to Tseng memory-mapped accelerator-control registers 5 */ 6 7 #define MMU_CONTROL(x) MMIO_OUT8(pTseng->MMioBase, 0x13<<0, x) 8 #define ACL_SUSPEND_TERMINATE(x) MMIO_OUT8(pTseng->MMioBase, 0x30<<0, x) 9 #define ACL_OPERATION_STATE(x) MMIO_OUT8(pTseng->MMioBase, 0x31<<0, x) 10 11 #define ACL_SYNC_ENABLE(x) MMIO_OUT8(pTseng->MMioBase, 0x32<<0, x) 12 /* for ET6000, ACL_SYNC_ENABLE becomes ACL_6K_CONFIG */ 13 14 #define ACL_INTERRUPT_STATUS(x) \ 15 MMIO_OUT8(pTseng->MMioBase, 0x35<<0, x) 16 #define ACL_INTERRUPT_MASK(x) MMIO_OUT8(pTseng->MMioBase, 0x34<<0, x) 17 #define ACL_ACCELERATOR_STATUS (0x36 << 0) 18 #define ACL_ACCELERATOR_STATUS_SET(x) \ 19 MMIO_OUT8(pTseng->MMioBase, ACL_ACCELERATOR_STATUS, x) 20 #define ACL_WRITE_INTERFACE_VALID (0x33 << 0) 21 22 /* and this is only for the ET6000 */ 23 #define ACL_POWER_CONTROL(x) MMIO_OUT8(pTseng->MMioBase, 0x37<<0, x) 24 25 /* non-queued for w32p's and ET6000 */ 26 #define ACL_NQ_X_POSITION(x) MMIO_OUT16(pTseng->MMioBase, 0x38<<0, x) 27 #define ACL_NQ_Y_POSITION(x) MMIO_OUT16(pTseng->MMioBase, 0x3A<<0, x) 28 /* queued for w32 and w32i */ 29 #define ACL_X_POSITION(x) MMIO_OUT16(pTseng->MMioBase, 0x94<<0, x) 30 #define ACL_Y_POSITION(x) MMIO_OUT16(pTseng->MMioBase, 0x96<<0, x) 31 32 #define ACL_PATTERN_ADDRESS(x) MMIO_OUT32(pTseng->MMioBase, 0x80<<0, x) 33 #define ACL_SOURCE_ADDRESS(x) MMIO_OUT32(pTseng->MMioBase, 0x84<<0, x) 34 35 #define ACL_PATTERN_Y_OFFSET(x) MMIO_OUT16(pTseng->MMioBase, 0x88<<0, x) 36 #define ACL_PATTERN_Y_OFFSET32(x) MMIO_OUT32(pTseng->MMioBase, 0x88<<0, x) 37 #define ACL_SOURCE_Y_OFFSET(x) MMIO_OUT16(pTseng->MMioBase, 0x8A<<0, x) 38 #define ACL_DESTINATION_Y_OFFSET(x) MMIO_OUT16(pTseng->MMioBase, 0x8C<<0, x) 39 40 /* W32i */ 41 #define ACL_VIRTUAL_BUS_SIZE(x) MMIO_OUT8(pTseng->MMioBase, 0x8E<<0, x) 42 /* w32p */ 43 #define ACL_PIXEL_DEPTH(x) MMIO_OUT8(pTseng->MMioBase, 0x8E<<0, x) 44 45 /* w32 and w32i */ 46 #define ACL_XY_DIRECTION(x) MMIO_OUT8(pTseng->MMioBase, 0x8F<<0, x) 47 48 #define ACL_PATTERN_WRAP(x) MMIO_OUT8(pTseng->MMioBase, 0x90<<0, x) 49 #define ACL_PATTERN_WRAP32(x) MMIO_OUT32(pTseng->MMioBase, 0x90<<0, x) 50 #define ACL_TRANSFER_DISABLE(x) MMIO_OUT8(pTseng->MMioBase, 0x91<<0, x) /* ET6000 only */ 51 #define ACL_SOURCE_WRAP(x) MMIO_OUT8(pTseng->MMioBase, 0x92<<0, x) 52 53 #define ACL_X_COUNT(x) MMIO_OUT16(pTseng->MMioBase, 0x98<<0, x) 54 #define ACL_Y_COUNT(x) MMIO_OUT16(pTseng->MMioBase, 0x9A<<0, x) 55 /* shortcut. not a real register */ 56 #define ACL_XY_COUNT(x) MMIO_OUT32(pTseng->MMioBase, 0x98<<0, x) 57 58 #define ACL_ROUTING_CONTROL(x) MMIO_OUT8(pTseng->MMioBase, 0x9C<<0, x) 59 /* for ET6000, ACL_ROUTING_CONTROL becomes ACL_MIX_CONTROL */ 60 #define ACL_RELOAD_CONTROL(x) MMIO_OUT8(pTseng->MMioBase, 0x9D<<0, x) 61 /* for ET6000, ACL_RELOAD_CONTROL becomes ACL_STEPPING_INHIBIT */ 62 63 #define ACL_BACKGROUND_RASTER_OPERATION(x) MMIO_OUT8(pTseng->MMioBase, 0x9E<<0, x) 64 #define ACL_FOREGROUND_RASTER_OPERATION(x) MMIO_OUT8(pTseng->MMioBase, 0x9F<<0, x) 65 66 #define ACL_DESTINATION_ADDRESS(x) MMIO_OUT32(pTseng->MMioBase, 0xA0<<0, x) 67 68 /* the following is for the w32p's only */ 69 #define ACL_MIX_ADDRESS(x) MMIO_OUT32(pTseng->MMioBase, 0xA4<<0, x) 70 71 #define ACL_MIX_Y_OFFSET(x) MMIO_OUT16(pTseng->MMioBase, 0xA8<<0, x) 72 #define ACL_ERROR_TERM(x) MMIO_OUT16(pTseng->MMioBase, 0xAA<<0, x) 73 #define ACL_DELTA_MINOR(x) MMIO_OUT16(pTseng->MMioBase, 0xAC<<0, x) 74 #define ACL_DELTA_MINOR32(x) MMIO_OUT32(pTseng->MMioBase, 0xAC<<0, x) 75 #define ACL_DELTA_MAJOR(x) MMIO_OUT16(pTseng->MMioBase, 0xAE<<0, x) 76 77 /* ET6000 only (trapezoids) */ 78 #define ACL_SECONDARY_EDGE(x) MMIO_OUT8(pTseng->MMioBase, 0x93<<0, x) 79 #define ACL_SECONDARY_ERROR_TERM(x) MMIO_OUT16(pTseng->MMioBase, 0xB2<<0, x) 80 #define ACL_SECONDARY_DELTA_MINOR(x) MMIO_OUT16(pTseng->MMioBase, 0xB4<<0, x) 81 #define ACL_SECONDARY_DELTA_MINOR32(x) MMIO_OUT32(pTseng->MMioBase, 0xB4<<0, x) 82 #define ACL_SECONDARY_DELTA_MAJOR(x) MMIO_OUT16(pTseng->MMioBase, 0xB6<<0, x) 83 84 /* for ET6000: */ 85 #define ACL_6K_CONFIG ACL_SYNC_ENABLE 86 87 /* for ET6000: */ 88 #define ACL_MIX_CONTROL ACL_ROUTING_CONTROL 89 #define ACL_STEPPING_INHIBIT ACL_RELOAD_CONTROL 90 91 /* 92 * Some shortcuts. 93 */ 94 95 #define MAX_WAIT_CNT 500000 /* how long we wait before we time out */ 96 #undef WAIT_VERBOSE /* if defined: print out how long we waited */ 97 98 void tseng_recover_timeout(TsengPtr pTseng); 99 100 static __inline__ void 101 tseng_wait(TsengPtr pTseng, int reg, char *name, unsigned char mask) 102 { 103 int cnt = MAX_WAIT_CNT; 104 105 while ((MMIO_IN32(pTseng->MMioBase,reg)) & mask) 106 if (--cnt < 0) { 107 ErrorF("WAIT_%s: timeout.\n", name); 108 tseng_recover_timeout(pTseng); 109 break; 110 } 111 #ifdef WAIT_VERBOSE 112 ErrorF("%s%d ", name, MAX_WAIT_CNT - cnt); 113 #endif 114 } 115 116 #define WAIT_QUEUE tseng_wait(pTseng, ACL_ACCELERATOR_STATUS, "QUEUE", 0x1) 117 118 /* This is only for W32p rev b...d */ 119 #define WAIT_INTERFACE tseng_wait(pTseng, ACL_WRITE_INTERFACE_VALID, "INTERFACE", 0xf) 120 121 #define WAIT_ACL tseng_wait(pTseng, ACL_ACCELERATOR_STATUS, "ACL", 0x2) 122 123 #define WAIT_XY tseng_wait(pTseng, ACL_ACCELERATOR_STATUS, "XY", 0x4) 124 125 #define SET_FUNCTION_BLT \ 126 if (pTseng->ChipType == ET6000) \ 127 ACL_MIX_CONTROL(0x33); \ 128 else \ 129 ACL_ROUTING_CONTROL(0x00); 130 131 #define SET_FUNCTION_BLT_TR \ 132 ACL_MIX_CONTROL(0x13); 133 134 #define FBADDR(pTseng, x,y) ( (y) * pTseng->line_width + MULBPP(pTseng, x) ) 135 136 #define SET_FG_ROP(rop) \ 137 ACL_FOREGROUND_RASTER_OPERATION(W32OpTable[rop]); 138 139 #define SET_FG_ROP_PLANEMASK(rop) \ 140 ACL_FOREGROUND_RASTER_OPERATION(W32OpTable_planemask[rop]); 141 142 #define SET_BG_ROP(rop) \ 143 ACL_BACKGROUND_RASTER_OPERATION(W32PatternOpTable[rop]); 144 145 #define SET_BG_ROP_TR(rop, bg_color) \ 146 if ((bg_color) == -1) /* transparent color expansion */ \ 147 ACL_BACKGROUND_RASTER_OPERATION(0xaa); \ 148 else \ 149 ACL_BACKGROUND_RASTER_OPERATION(W32PatternOpTable[rop]); 150 151 #define SET_DELTA(Min, Maj) \ 152 ACL_DELTA_MINOR32(((Maj) << 16) + (Min)) 153 154 #define SET_SECONDARY_DELTA(Min, Maj) \ 155 ACL_SECONDARY_DELTA_MINOR(((Maj) << 16) + (Min)) 156 157 #ifdef NO_OPTIMIZE 158 #define SET_XYDIR(dir) \ 159 ACL_XY_DIRECTION(dir); 160 #else 161 /* 162 * only changing ACL_XY_DIRECTION when it needs to be changed avoids 163 * unnecessary PCI bus writes, which are slow. This shows up very well 164 * on consecutive small fills. 165 */ 166 #define SET_XYDIR(dir) \ 167 if ((dir) != pTseng->tseng_old_dir) \ 168 pTseng->tseng_old_dir = (dir); \ 169 ACL_XY_DIRECTION(pTseng->tseng_old_dir); 170 #endif 171 172 #define SET_SECONDARY_XYDIR(dir) \ 173 ACL_SECONDARY_EDGE(dir); 174 175 /* Must do 0x09 (in one operation) for the W32 */ 176 #define START_ACL(pTseng, dst) \ 177 ACL_DESTINATION_ADDRESS(dst); 178 179 /* START_ACL for the ET6000 */ 180 #define START_ACL_6(dst) \ 181 ACL_DESTINATION_ADDRESS(dst); 182 183 #define START_ACL_CPU(pTseng, dst) \ 184 ACL_DESTINATION_ADDRESS(dst); 185 186 /* ACL_DESTINATION_ADDRESS(dst); should be enough for START_ACL_CPU */ 187 188 /* 189 * Some commonly used inline functions and utility functions. 190 */ 191 192 static __inline__ int 193 COLOR_REPLICATE_DWORD(TsengPtr pTseng, int color) 194 { 195 switch (pTseng->Bytesperpixel) { 196 case 1: 197 color &= 0xFF; 198 color = (color << 8) | color; 199 color = (color << 16) | color; 200 break; 201 case 2: 202 color &= 0xFFFF; 203 color = (color << 16) | color; 204 break; 205 } 206 return color; 207 } 208 209 /* 210 * Optimizing note: increasing the wrap size for fixed-color source/pattern 211 * tiles from 4x1 (as below) to anything bigger doesn't seem to affect 212 * performance (it might have been better for larger wraps, but it isn't). 213 */ 214 215 static __inline__ void 216 SET_FG_COLOR(TsengPtr pTseng, int color) 217 { 218 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 219 ACL_SOURCE_Y_OFFSET(3); 220 color = COLOR_REPLICATE_DWORD(pTseng, color); 221 MMIO_OUT32(pTseng->scratchMemBase, pTseng->tsengFg, color); 222 223 ACL_SOURCE_WRAP(0x02); 224 } 225 226 static __inline__ void 227 SET_BG_COLOR(TsengPtr pTseng, int color) 228 { 229 ACL_PATTERN_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengPat); 230 ACL_PATTERN_Y_OFFSET(3); 231 color = COLOR_REPLICATE_DWORD(pTseng, color); 232 MMIO_OUT32(pTseng->scratchMemBase,pTseng->tsengPat, color); 233 234 ACL_PATTERN_WRAP(0x02); 235 } 236 237 /* 238 * this does the same as SET_FG_COLOR and SET_BG_COLOR together, but is 239 * faster, because it allows the PCI chipset to chain the requests into a 240 * burst sequence. The order of the commands is partly linear. 241 * So far for the theory... 242 */ 243 static __inline__ void 244 SET_FG_BG_COLOR(TsengPtr pTseng, int fgcolor, int bgcolor) 245 { 246 ACL_PATTERN_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengPat); 247 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 248 ACL_PATTERN_Y_OFFSET32(0x00030003); 249 fgcolor = COLOR_REPLICATE_DWORD(pTseng, fgcolor); 250 bgcolor = COLOR_REPLICATE_DWORD(pTseng, bgcolor); 251 MMIO_OUT32(pTseng->scratchMemBase,pTseng->tsengFg, fgcolor); 252 MMIO_OUT32(pTseng->scratchMemBase,pTseng->tsengPat, bgcolor); 253 254 ACL_PATTERN_WRAP32(0x00020002); 255 } 256 257 /* 258 * Real 32-bit multiplications are horribly slow compared to 16-bit (on i386). 259 */ 260 #ifdef NO_OPTIMIZE 261 static __inline__ int 262 MULBPP(TsengPtr pTseng, int x) 263 { 264 return (x * pTseng->Bytesperpixel); 265 } 266 #else 267 static __inline__ int 268 MULBPP(TsengPtr pTseng, int x) 269 { 270 int result = x << pTseng->powerPerPixel; 271 272 if (pTseng->Bytesperpixel != 3) 273 return result; 274 else 275 return result + x; 276 } 277 #endif 278 279 static __inline__ int 280 CALC_XY(TsengPtr pTseng, int x, int y) 281 { 282 int new_x, xy; 283 284 if ((pTseng->old_y == y) && (pTseng->old_x == x)) 285 return -1; 286 287 if (pTseng->ChipType == ET4000) 288 new_x = MULBPP(pTseng, x - 1); 289 else 290 new_x = MULBPP(pTseng, x) - 1; 291 xy = ((y - 1) << 16) + new_x; 292 pTseng->old_x = x; 293 pTseng->old_y = y; 294 return xy; 295 } 296 297 /* generic SET_XY */ 298 static __inline__ void 299 SET_XY(TsengPtr pTseng, int x, int y) 300 { 301 int new_x; 302 303 if (pTseng->ChipType == ET4000) 304 new_x = MULBPP(pTseng, x - 1); 305 else 306 new_x = MULBPP(pTseng, x) - 1; 307 ACL_XY_COUNT(((y - 1) << 16) + new_x); 308 pTseng->old_x = x; 309 pTseng->old_y = y; 310 } 311 312 static __inline__ void 313 SET_X_YRAW(TsengPtr pTseng, int x, int y) 314 { 315 int new_x; 316 317 if (pTseng->ChipType == ET4000) 318 new_x = MULBPP(pTseng, x - 1); 319 else 320 new_x = MULBPP(pTseng, x) - 1; 321 ACL_XY_COUNT((y << 16) + new_x); 322 pTseng->old_x = x; 323 pTseng->old_y = y - 1; /* old_y is invalid (raw transfer) */ 324 } 325 326 /* 327 * This is plain and simple "benchmark rigging". 328 * (no real application does lots of subsequent same-size blits) 329 * 330 * The effect of this is amazingly good on e.g large blits: 400x400 331 * rectangle fill in 24 and 32 bpp on ET6000 jumps from 276 MB/sec to up to 332 * 490 MB/sec... But not always. There must be a good reason why this gives 333 * such a boost, but I don't know it. 334 */ 335 336 static __inline__ void 337 SET_XY_4(TsengPtr pTseng, int x, int y) 338 { 339 int new_xy; 340 341 if ((pTseng->old_y != y) || (pTseng->old_x != x)) { 342 new_xy = ((y - 1) << 16) + MULBPP(pTseng, x - 1); 343 ACL_XY_COUNT(new_xy); 344 pTseng->old_x = x; 345 pTseng->old_y = y; 346 } 347 } 348 349 static __inline__ void 350 SET_XY_6(TsengPtr pTseng, int x, int y) 351 { 352 int new_xy; /* using this intermediate variable is faster */ 353 354 if ((pTseng->old_y != y) || (pTseng->old_x != x)) { 355 new_xy = ((y - 1) << 16) + MULBPP(pTseng, x) - 1; 356 ACL_XY_COUNT(new_xy); 357 pTseng->old_x = x; 358 pTseng->old_y = y; 359 } 360 } 361 362 /* generic SET_XY_RAW */ 363 static __inline__ void 364 SET_XY_RAW(TsengPtr pTseng,int x, int y) 365 { 366 ACL_XY_COUNT((y << 16) + x); 367 pTseng->old_x = pTseng->old_y = -1; /* invalidate old_x/old_y (raw transfers) */ 368 } 369 370 static __inline__ void 371 PINGPONG(TsengPtr pTseng) 372 { 373 if (pTseng->tsengFg == 0) { 374 pTseng->tsengFg = 8; 375 pTseng->tsengBg = 24; 376 pTseng->tsengPat = 40; 377 } else { 378 pTseng->tsengFg = 0; 379 pTseng->tsengBg = 16; 380 pTseng->tsengPat = 32; 381 } 382 } 383 384 /* 385 * This is called in each ACL function just before the first ACL register is 386 * written to. It waits for the accelerator to finish on cards that don't 387 * support hardware-wait-state locking, and waits for a free queue entry on 388 * others, if hardware-wait-states are not enabled. 389 */ 390 static __inline__ void 391 wait_acl_queue(TsengPtr pTseng) 392 { 393 if (pTseng->UsePCIRetry) 394 WAIT_QUEUE; 395 if (pTseng->need_wait_acl) 396 WAIT_ACL; 397 } 398 #endif /* _TSENG_ACCEL_H */ 399