grf_ultms.g revision 1.8
1; $NetBSD: grf_ultms.g,v 1.8 2003/07/16 19:58:59 is Exp $ 2; 3; ite support for A2410. 4 5; 6; Copyright (c) 1995 Ignatios Souvatzis. 7; All rights reserved. 8; 9; Redistribution and use in source and binary forms, with or without 10; modification, are permitted provided that the following conditions 11; are met: 12; 1. Redistributions of source code must retain the above copyright 13; notice, this list of conditions and the following disclaimer. 14; 2. Redistributions in binary form must reproduce the above copyright 15; notice, this list of conditions and the following disclaimer in the 16; documentation and/or other materials provided with the distribution. 17; 3. All advertising materials mentioning features or use of this software 18; must display the following acknowledgement: 19; This product contains software developed by Ignatios Souvatzis 20; for the NetBSD project. 21; 4. The name of the author may not be used to endorse or promote products 22; derived from this software without specific prior written permission 23; 24; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 35; This file contains the source code for grf_ultms.c. It is assembler 36; code for the TMS34010 CPU/graphics processor, as understood by the 37; in-tree version of Paul Mackerras' "gspa" assembler. 38; 39; Use 'make grf_ultms.c' to generate the .c file. 40 41; memory map: 42; FF800000 .. FF9FFFFF overlay planes 43; FFA00000 .. FFA0FFFF ite support code 44; FFA10000 .. FFA1FFFF ite support, input queue 45; FFA20000 .. FFA2FEFF variables 46; FFA2FF00 .. FFA2FFFF variables, X server 47; FFA30000 .. FFA3FFFF font data 48; FFA40000 .. FFA4FFFF font data, bold 49; FFA50000 .. FFA5FFFF X server, input queue 50; FFA60000 .. FFFFC000 X server, onboard pixmaps 51 52; Start of data area 53 .org $FFA20000 54d: 55 56; 57; Ring buffer for getting stuff from host 58; Data buffer: 59inbuf = $FFA10000 ; 64kbits here (8k bytes) 60; 61; Pointers: (these must be at address $FFA20000) 62put: .long inbuf 63get: .long inbuf 64 65; 66; Mode bits for communication between GSP and CPU 67; 68; GSP mode bits: set by CPU, control GSP operation 69GSP_HOLD = 0 70GSP_FLUSH = 1 71GSP_ALT_SCRN = 2 72GSP_DISP_CTRL = 3 73GSP_NO_CURSOR = 4 74GSP_CALL_X = 5 75gsp_mode: .word 0 76 77; 78; Pointer to X operation routine 79xproc: .long 0 80 81; We leave the next few words for future communication requirements 82 83 .org d+0x100 84; 85; Other data: 86magic: .blkl 1 ; set => screen already inited 87MAGIC = 0xD0D0BEAC 88 89screen_width: .word 1024 90screen_height: .word 768 91screen_origin: .long $FE000000 ; just a placeholder 92screen_pitch: .word 8192 ; 1024*8 93pixel_size: .word 8 94 95 .org d+0x200 96font_adr: 97; 98; Font information is stored in the structure defined declared below. 99; 100bitmap_ptrs: .long $FFA30000 ; points to first bitmap 101font_size: .long $00080008 ; Y:X bitmap size 102under_row: .word 6 ; row # for underlines 103under_ht: .word 1 ; thickness of underline 104first_char: .word 32 ; first and last char in font 105last_char: .word 255 ; 106bold_smear: .word 1 ; for making bold fonts 107 108bgcolor: .long 0 ; background color 109fgcolor: .long $01010101 ; foreground color 110;precomputed out of what the host gave us: 111font_area: .word 64 ; in pixels 112font_pitch: .word 8 113font_lmo: .word 28 114 115 116; Control register addresses 117hesync = $c0000000 118dpyctl = $c0000080 119control = $c00000b0 120convsp = $c0000130 121convdp = $c0000140 122psize = $c0000150 123 124; 125; Bits in control register 126T = $20 ; enable transparency 127W = $C0 ; window options 128PBH = $100 ; pixblt horiz dirn 129PBV = $200 ; pixblt vertical dirn 130PPOP = $7C00 ; pixel processing options 131 132; 133; Bits in dpyctl register 134SRT = $800 ; do serial register transfers 135 136free_memory: .long free_memory_start 137free_memory_start: ; allocate dynamic arrays from here 138 139; 140; Program starts here. 141 .org $FFA00000 142 .start . 143 144; 145; initialization 146; 147 setf 16,0,0 ; just in case 148 setf 32,0,1 149 move $fffff000,sp 150 151; Set up sync, blank parameters 152; done by host through interface 153 154; set up overlay clut: 155 move $0,a0 156 move a0,@$fe800000 157 move $fe800030,a1 158 move 128,a0 159 move a0,*a1 160 move a0,*a1 161 move a0,*a1 162 move 0,a0 163 move a0,*a1 164 move a0,*a1 165 move a0,*a1 166 move a0,*a1 167 move a0,*a1 168 move a0,*a1 169 move a0,*a1 170 move a0,*a1 171 move a0,*a1 172 173; set up overlay planes: 174 move 6,a0 175 move a0,@$fe800000 176 move $0b,a0 177 move a0,@$fe800020 178 179; set up global registers 180 move @screen_pitch,b3,0 181 move @screen_origin,b4,1 182 move @bgcolor,b8,1 183 lmo b3,b0 184 move b0,@convdp,0 185 move @control,a0,0 186 andn $7FE0,a0 ; clear PPOP, PBV, PBH, W, T fields 187 move a0,@control,0 188 move @pixel_size,a0,0 189 move a0,@psize,0 190 move @psize,a0,0 191 192 193; clear the entire screen 194 move b4,b2 195 move 0,b9 196 move @screen_width,b7,1 197 fill l 198 1994: 200; main stuff... 201 move @get,a0,1 202 jruc main_loop 203loop_end: 204 clr a4 205 move a4,*a0,0 206 addxy a1,a0 207 move a0,@get,1 208main_loop: 209 move @gsp_mode,a1,0 210 btst GSP_CALL_X,a1 211 jreq main_loop_1 212 213 mmtm sp,a0,a1,a2,a3 214 move @xproc,a4,1 215 call a4 216 mmfm sp,a0,a1,a2,a3 217 218main_loop_1: 219 move @put,a3,1 220 move *a0,a1,0 221 222 move a1,a2 223 andi $FFF0,a1 224 jrz main_loop 225 226 sub a0,a3 227 jreq main_loop 228continue: 229 andi $F,a2 230 jrz loop_end 231 dec a2 232 jrnz testfor2 233; op 1 - char 234 movk 6,b10 235 move b10,@$fe800000,0 236 movk 1,b10 237 move b10,@$fe800020,0 238 239 move a0,b10 240 move *b10+,b12,0 ; dummy move (faster than addk) 241 move *b10+,b12,0 ; char code 242 move @first_char,b11,0 243 sub b11,b12 ; minus first char in font 244 move @font_size,b7,1 ;dydx - char size->pixel array dimensions 245 move @font_pitch,b1 246 move @font_lmo,b0 247 move b0,@convsp,0 248 move @font_area,b11 249 250 mpyu b12,b11 ; times char offset 251 move @font_adr,b0,1 ; font bitmaps base 252 add b11,b0 ; character bitmap start addr. linear 253 254 move *b10+,b8,0 ; fg 255 move *b10+,b9,0 ; bg 256 move *b10+,b2,1 ; y:x 257 258 move *b10+,b11,0 ; flags 259 move b11,a4 260 btst 0,a4 261 jreq noinv 262 move b8,b11 263 move b9,b8 264 move b11,b9 265noinv: 266 btst 2,a4 267 jreq nobold 268 addi $10000,b0 269nobold: 270 move b2,a5 271 pixblt b,xy 272 move a5,b2 273 274 btst 1,a4 275 jreq noul 276 move @under_row,b11,0 277 sll 16,b11 ; shift into Y half 278 add b11,b2 279 move @under_ht,b11,0 280 sll 16,b11 ; shift into Y half 281 movy b11,b7 ; and move Y half only 282 fill xy 283noul: 284 jruc loop_end 285testfor2: 286 dec a2 287 jrnz testfor3 288; op 2 - fill 289 move a0,b10 290 move *b10+,b9,0 ; dummy move 291 move *b10+,b9,0 ; color 292 move *b10+,b2,1 ; XY start address 293 move *b10+,b7,1 ; dydx 294 295 move @control,b0,0 296 move b0,*-sp 297 move *b10+,b0 298 setf 5,0,0 299 move b0,@control+10 300 setf 16,0,0 301 move @control,b0,0 302 303 fill xy 304 305 move *sp+,b0 306 move b0,@control,0 307 jruc loop_end,l 308 309testfor3: 310 dec a2 311 jrnz testfor4 312; op 3 - pixblt 313 move a0,b10 314 move @convdp,@convsp,0 315 move *b10+,b0,0 ; dummy move 316 move *b10+,b0,1 ; XY src 317 move *b10+,b7,1 ; dxdy 318 move *b10+,b2,1 ; XY dst 319 move b3,b1 320 move @control,b11,0 321 andni PBH|PBV,b11 322 cmpxy b0,b2 323 jrc yok 324 ori PBV,b11 325yok: jrv xok 326 ori PBH,b11 327xok: move b11,@control,0 328 move @control,b11,0 329 330 pixblt xy,xy 331 jruc loop_end,l 332 333testfor4: 334 dec a2 335 jrnz testfor5 336 337; op 4 - mirror the font and precompute some values. 338 339 move @font_size,a5,0 340 movk 8,a6 341 cmp a6,a5 342 jrle t4b8 343 movi 16, a6 344t4b8: move a6,@font_pitch,0 345 lmo a5,a6 346 move a6,@font_lmo,0 347 move @font_size+$10,a6,0 348 move @font_pitch,a5,0 349 mpyu a6,a5 350 move a5,@font_area,0 351 352 move @last_char,a6,0 353 move @first_char,a5,0 354 sub a5,a6 355 addk 1,a6 356 move @font_size+$10,a5,0 357 mpyu a6,a5 358 move @font_size,a7,0 359 cmpi 8,a7 360 move $7f7f,a12 ; mask for bold smearing 361 jrgt t4bf ; wider than 8 pixels? 362 addk 1,a5 ; yes, the words are only half the # of rows 363 srl 1,a5 364 move $7fff,a12 ; mask for bold smearing changes, too 365t4bf: move @font_adr,a6,1 366 move a6,a9 367 addi $10000,a9 ; start address of bold font 368 move @bold_smear,a10 369 370; fortunately, this loop fits into 3 of the 4 cache segments: 371; execution time: about 32 periods per word of font. 372 373mirlp: move *a6,a7 374 clr a8 375 376 srl 1,a7 377 addc a8,a8 378 srl 1,a7 379 addc a8,a8 380 srl 1,a7 381 addc a8,a8 382 srl 1,a7 383 addc a8,a8 384 385 srl 1,a7 386 addc a8,a8 387 srl 1,a7 388 addc a8,a8 389 srl 1,a7 390 addc a8,a8 391 srl 1,a7 392 addc a8,a8 393 394 srl 1,a7 395 addc a8,a8 396 srl 1,a7 397 addc a8,a8 398 srl 1,a7 399 addc a8,a8 400 srl 1,a7 401 addc a8,a8 402 403 srl 1,a7 404 addc a8,a8 405 srl 1,a7 406 addc a8,a8 407 srl 1,a7 408 addc a8,a8 409 srl 1,a7 410 addc a8,a8 411 412 move a8,*a6+ 413 move a8,a7 414 move a10,a11 415smearlp: 416 and a12,a7 417 sll 1,a7 418 or a7,a8 419 dsj a11,smearlp 420 move a8,*a9+ 421 422 dsj a5,mirlp 423;; support odd-sized fonts. pitch must still be 8 or 16 424 move @font_size,a5,0 425 move @font_pitch,a6,0 426 sub a5,a6 427 move @font_adr,a5,1 428 add a5,a6 429 move a6,@font_adr,1 430;; 431 jruc loop_end,l 432 433 434testfor5: 435 dec a2 436 jrne testfor6 437; loadclut --- load clut entry. 438; 1==overlay index red green blue 439; for speed reasons, the host will load the image clut directly rather 440; than through us, but its not that expensive to support both here 441; just in case 442 move a0,a4 443 addk $10,a4 444 move $fe800030,a6 445 move *a4+,a5,0 446 jrne t5l1 447 subk $20,a6 448t5l1: move *a4+,a5,0 449 move a5,@$fe800000,0 450 move *a4+,a5,0 451 move a5,*a6,0 452 move *a4+,a5,0 453 move a5,*a6,0 454 move *a4+,a5,0 455 move a5,*a6,0 456 jruc loop_end,l 457 458testfor6: 459 dec a2 460 jrne testfor7 461 462; op 6: load new framebuffer size and position for ite support. 463 move a0,b10 464 addk $10,b10 465 move *b10+,b7,1 466 move b7,@screen_width,1 467 move *b10+,b4,1 468 move b4,@screen_origin,1 469 move *b10+,b3,0 470 move b3,@screen_pitch,0 471 lmo b3,b0 472 move b0,@convdp,0 473 move *b10,b0,0 474 move b0,@psize,0 475 move b0,@pixel_size,0 ; this syncs the psize write, too 476 477 jruc loop_end,l 478 479testfor7: 480 jruc loop_end,l 481;;; 482