grf_ultms.g revision 1.8
1; $NetBSD: grf_ultms.g,v 1.8 2003/07/16 19:58:59 is Exp $
2;
3; ite support for A2410.
4
5;
6; Copyright (c) 1995 Ignatios Souvatzis.
7; All rights reserved.
8;
9; Redistribution and use in source and binary forms, with or without
10; modification, are permitted provided that the following conditions
11; are met:
12; 1. Redistributions of source code must retain the above copyright
13;    notice, this list of conditions and the following disclaimer.
14; 2. Redistributions in binary form must reproduce the above copyright
15;    notice, this list of conditions and the following disclaimer in the
16;    documentation and/or other materials provided with the distribution.
17; 3. All advertising materials mentioning features or use of this software
18;    must display the following acknowledgement:
19;	This product contains software developed by Ignatios Souvatzis
20;	for the NetBSD project.
21; 4. The name of the author may not be used to endorse or promote products
22;    derived from this software without specific prior written permission
23;
24; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35; This file contains the source code for grf_ultms.c. It is assembler
36; code for the TMS34010 CPU/graphics processor, as understood by the
37; in-tree version of Paul Mackerras' "gspa" assembler.
38;
39; Use 'make grf_ultms.c' to generate the .c file.
40
41; memory map:
42; FF800000 .. FF9FFFFF	overlay planes
43; FFA00000 .. FFA0FFFF	ite support code
44; FFA10000 .. FFA1FFFF	ite support, input queue
45; FFA20000 .. FFA2FEFF	variables
46; FFA2FF00 .. FFA2FFFF	variables, X server
47; FFA30000 .. FFA3FFFF	font data
48; FFA40000 .. FFA4FFFF	font data, bold
49; FFA50000 .. FFA5FFFF	X server, input queue
50; FFA60000 .. FFFFC000	X server, onboard pixmaps
51
52; Start of data area
53	.org	$FFA20000
54d:
55
56;
57; Ring buffer for getting stuff from host
58; Data buffer:
59inbuf	=	$FFA10000	; 64kbits here (8k bytes)
60;
61; Pointers: (these must be at address $FFA20000)
62put:		.long	inbuf
63get:		.long	inbuf
64
65;
66; Mode bits for communication between GSP and CPU
67;
68; GSP mode bits: set by CPU, control GSP operation
69GSP_HOLD =	0
70GSP_FLUSH =	1
71GSP_ALT_SCRN =	2
72GSP_DISP_CTRL =	3
73GSP_NO_CURSOR =	4
74GSP_CALL_X =	5
75gsp_mode:	.word	0
76
77;
78; Pointer to X operation routine
79xproc:		.long	0
80
81; We leave the next few words for future communication requirements
82
83		.org	d+0x100
84;
85; Other data:
86magic:		.blkl	1		; set => screen already inited
87MAGIC =		0xD0D0BEAC
88
89screen_width:	.word	1024
90screen_height:	.word	768
91screen_origin:	.long	$FE000000	; just a placeholder
92screen_pitch:	.word	8192		; 1024*8
93pixel_size:	.word	8
94
95		.org	d+0x200
96font_adr:
97;
98; Font information is stored in the structure defined declared below.
99;
100bitmap_ptrs:	.long	$FFA30000	; points to first bitmap
101font_size:	.long	$00080008	; Y:X bitmap size
102under_row:	.word	6		; row # for underlines
103under_ht:	.word	1		; thickness of underline
104first_char:	.word	32		; first and last char in font
105last_char:	.word	255		;
106bold_smear:	.word	1		; for making bold fonts
107
108bgcolor:	.long	0		; background color
109fgcolor:	.long	$01010101	; foreground color
110;precomputed out of what the host gave us:
111font_area:	.word	64		; in pixels
112font_pitch:	.word	8
113font_lmo:	.word	28
114
115
116; Control register addresses
117hesync	=	$c0000000
118dpyctl	=	$c0000080
119control	=	$c00000b0
120convsp	=	$c0000130
121convdp	=	$c0000140
122psize	=	$c0000150
123
124;
125; Bits in control register
126T	=	$20		; enable transparency
127W	=	$C0		; window options
128PBH	=	$100		; pixblt horiz dirn
129PBV	=	$200		; pixblt vertical dirn
130PPOP	=	$7C00		; pixel processing options
131
132;
133; Bits in dpyctl register
134SRT	=	$800		; do serial register transfers
135
136free_memory:	.long	free_memory_start
137free_memory_start:		; allocate dynamic arrays from here
138
139;
140; Program starts here.
141	.org	$FFA00000
142	.start	.
143
144;
145; initialization
146;
147	setf	16,0,0		; just in case
148	setf	32,0,1
149	move	$fffff000,sp
150
151; Set up sync, blank parameters
152; done by host through interface
153
154; set up overlay clut:
155	move	$0,a0
156	move	a0,@$fe800000
157	move	$fe800030,a1
158	move	128,a0
159	move	a0,*a1
160	move	a0,*a1
161	move	a0,*a1
162	move	0,a0
163	move	a0,*a1
164	move	a0,*a1
165	move	a0,*a1
166	move	a0,*a1
167	move	a0,*a1
168	move	a0,*a1
169	move	a0,*a1
170	move	a0,*a1
171	move	a0,*a1
172
173; set up overlay planes:
174	move	6,a0
175	move	a0,@$fe800000
176	move	$0b,a0
177	move	a0,@$fe800020
178
179; set up global registers
180	move	@screen_pitch,b3,0
181	move	@screen_origin,b4,1
182	move	@bgcolor,b8,1
183	lmo	b3,b0
184	move	b0,@convdp,0
185	move	@control,a0,0
186	andn	$7FE0,a0		; clear PPOP, PBV, PBH, W, T fields
187	move	a0,@control,0
188	move	@pixel_size,a0,0
189	move	a0,@psize,0
190	move	@psize,a0,0
191
192
193; clear the entire screen
194	move	b4,b2
195	move	0,b9
196	move	@screen_width,b7,1
197	fill	l
198
1994:
200; main stuff...
201	move	@get,a0,1
202	jruc	main_loop
203loop_end:
204	clr	a4
205	move	a4,*a0,0
206	addxy	a1,a0
207	move	a0,@get,1
208main_loop:
209	move	@gsp_mode,a1,0
210	btst	GSP_CALL_X,a1
211	jreq	main_loop_1
212
213	mmtm	sp,a0,a1,a2,a3
214	move	@xproc,a4,1
215	call	a4
216	mmfm	sp,a0,a1,a2,a3
217
218main_loop_1:
219	move	@put,a3,1
220	move	*a0,a1,0
221
222	move	a1,a2
223	andi	$FFF0,a1
224	jrz	main_loop
225
226	sub	a0,a3
227	jreq	main_loop
228continue:
229	andi	$F,a2
230	jrz	loop_end
231	dec	a2
232	jrnz	testfor2
233; op 1 - char
234	movk	6,b10
235	move	b10,@$fe800000,0
236	movk	1,b10
237	move	b10,@$fe800020,0
238
239	move	a0,b10
240	move	*b10+,b12,0	; dummy move (faster than addk)
241	move	*b10+,b12,0	; char code
242	move	@first_char,b11,0
243	sub	b11,b12		; minus first char in font
244	move	@font_size,b7,1	;dydx - char size->pixel array dimensions
245	move	@font_pitch,b1
246	move	@font_lmo,b0
247	move	b0,@convsp,0
248	move	@font_area,b11
249
250	mpyu	b12,b11		; times char offset
251	move	@font_adr,b0,1	; font bitmaps base
252	add	b11,b0		; character bitmap start addr. linear
253
254	move	*b10+,b8,0	; fg
255	move	*b10+,b9,0	; bg
256	move	*b10+,b2,1	; y:x
257
258	move	*b10+,b11,0	; flags
259	move	b11,a4
260	btst	0,a4
261	jreq	noinv
262	move	b8,b11
263	move	b9,b8
264	move	b11,b9
265noinv:
266	btst	2,a4
267	jreq	nobold
268	addi	$10000,b0
269nobold:
270	move	b2,a5
271	pixblt	b,xy
272	move	a5,b2
273
274	btst	1,a4
275	jreq	noul
276	move	@under_row,b11,0
277	sll	16,b11		; shift into Y half
278	add	b11,b2
279	move	@under_ht,b11,0
280	sll	16,b11		; shift into Y half
281	movy	b11,b7		; and move Y half only
282	fill	xy
283noul:
284	jruc	loop_end
285testfor2:
286	dec	a2
287	jrnz	testfor3
288; op 2 - fill
289	move	a0,b10
290	move	*b10+,b9,0	; dummy move
291	move	*b10+,b9,0	; color
292	move	*b10+,b2,1	; XY start address
293	move	*b10+,b7,1	; dydx
294
295	move	@control,b0,0
296	move	b0,*-sp
297	move	*b10+,b0
298	setf	5,0,0
299	move	b0,@control+10
300	setf	16,0,0
301	move	@control,b0,0
302
303	fill	xy
304
305	move	*sp+,b0
306	move	b0,@control,0
307	jruc	loop_end,l
308
309testfor3:
310	dec	a2
311	jrnz	testfor4
312; op 3 - pixblt
313	move	a0,b10
314	move	@convdp,@convsp,0
315	move	*b10+,b0,0	; dummy move
316	move	*b10+,b0,1	; XY src
317	move	*b10+,b7,1	; dxdy
318	move	*b10+,b2,1	; XY dst
319	move	b3,b1
320	move	@control,b11,0
321	andni	PBH|PBV,b11
322	cmpxy	b0,b2
323	jrc	yok
324	ori	PBV,b11
325yok:	jrv	xok
326	ori	PBH,b11
327xok:	move	b11,@control,0
328	move	@control,b11,0
329
330	pixblt	xy,xy
331	jruc	loop_end,l
332
333testfor4:
334	dec	a2
335	jrnz	testfor5
336
337; op 4 - mirror the font and precompute some values.
338
339	move	@font_size,a5,0
340	movk	8,a6
341	cmp	a6,a5
342	jrle	t4b8
343	movi	16, a6
344t4b8:	move	a6,@font_pitch,0
345	lmo	a5,a6
346	move	a6,@font_lmo,0
347	move	@font_size+$10,a6,0
348	move	@font_pitch,a5,0
349	mpyu	a6,a5
350	move	a5,@font_area,0
351
352	move	@last_char,a6,0
353	move	@first_char,a5,0
354	sub	a5,a6
355	addk	1,a6
356	move	@font_size+$10,a5,0
357	mpyu	a6,a5
358	move	@font_size,a7,0
359	cmpi	8,a7
360	move	$7f7f,a12	; mask for bold smearing
361	jrgt	t4bf		; wider than 8 pixels?
362	addk	1,a5		; yes, the words are only half the # of rows
363	srl	1,a5
364	move	$7fff,a12	; mask for bold smearing changes, too
365t4bf:	move	@font_adr,a6,1
366	move	a6,a9
367	addi	$10000,a9 ; start address of bold font
368	move	@bold_smear,a10
369
370; fortunately, this loop fits into 3 of the 4 cache segments:
371; execution time: about 32 periods per word of font.
372
373mirlp:	move	*a6,a7
374	clr	a8
375
376	srl	1,a7
377	addc	a8,a8
378	srl	1,a7
379	addc	a8,a8
380	srl	1,a7
381	addc	a8,a8
382	srl	1,a7
383	addc	a8,a8
384
385	srl	1,a7
386	addc	a8,a8
387	srl	1,a7
388	addc	a8,a8
389	srl	1,a7
390	addc	a8,a8
391	srl	1,a7
392	addc	a8,a8
393
394	srl	1,a7
395	addc	a8,a8
396	srl	1,a7
397	addc	a8,a8
398	srl	1,a7
399	addc	a8,a8
400	srl	1,a7
401	addc	a8,a8
402
403	srl	1,a7
404	addc	a8,a8
405	srl	1,a7
406	addc	a8,a8
407	srl	1,a7
408	addc	a8,a8
409	srl	1,a7
410	addc	a8,a8
411
412	move	a8,*a6+
413	move	a8,a7
414	move	a10,a11
415smearlp:
416	and	a12,a7
417	sll	1,a7
418	or	a7,a8
419	dsj	a11,smearlp
420	move	a8,*a9+
421
422	dsj	a5,mirlp
423;; support odd-sized fonts. pitch must still be 8 or 16
424	move	@font_size,a5,0
425	move	@font_pitch,a6,0
426	sub	a5,a6
427	move	@font_adr,a5,1
428	add	a5,a6
429	move	a6,@font_adr,1
430;;
431	jruc	loop_end,l
432
433
434testfor5:
435	dec	a2
436	jrne	testfor6
437; loadclut --- load clut entry.
438;	1==overlay index red green blue
439;	for speed reasons, the host will load the image clut directly rather
440;	than through us, but its not that expensive to support both here
441;	just in case
442	move	a0,a4
443	addk	$10,a4
444	move	$fe800030,a6
445	move	*a4+,a5,0
446	jrne	t5l1
447	subk	$20,a6
448t5l1:	move	*a4+,a5,0
449	move	a5,@$fe800000,0
450	move	*a4+,a5,0
451	move	a5,*a6,0
452	move	*a4+,a5,0
453	move	a5,*a6,0
454	move	*a4+,a5,0
455	move	a5,*a6,0
456	jruc	loop_end,l
457
458testfor6:
459	dec	a2
460	jrne	testfor7
461
462; op 6: load new framebuffer size and position for ite support.
463	move	a0,b10
464	addk	$10,b10
465	move	*b10+,b7,1
466	move	b7,@screen_width,1
467	move	*b10+,b4,1
468	move	b4,@screen_origin,1
469	move	*b10+,b3,0
470	move	b3,@screen_pitch,0
471	lmo	b3,b0
472	move	b0,@convdp,0
473	move	*b10,b0,0
474	move	b0,@psize,0
475	move	b0,@pixel_size,0	; this syncs the psize write, too
476
477	jruc	loop_end,l
478
479testfor7:
480	jruc	loop_end,l
481;;;
482