grf_ultms.g revision 1.6
1; $NetBSD: grf_ultms.g,v 1.6 2002/01/26 13:40:55 aymeric Exp $
2;
3; ite support for A2410.
4
5;
6; Copyright (c) 1995 Ignatios Souvatzis.
7; All rights reserved.
8;
9; Redistribution and use in source and binary forms, with or without
10; modification, are permitted provided that the following conditions
11; are met:
12; 1. Redistributions of source code must retain the above copyright
13;    notice, this list of conditions and the following disclaimer.
14; 2. Redistributions in binary form must reproduce the above copyright
15;    notice, this list of conditions and the following disclaimer in the
16;    documentation and/or other materials provided with the distribution.
17; 3. All advertising materials mentioning features or use of this software
18;    must display the following acknowledgement:
19;	This product contains software developed by Ignatios Souvatzis
20;	for the NetBSD project.
21; 4. The name of the author may not be used to endorse or promote products
22;    derived from this software without specific prior written permission
23;
24; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35; This file contains the source code for grf_ultmscode.h. It is
36; assembler code for the TMS34010 CPU/graphics processor.
37;
38; Use Paul Mackerras' gspa assembler to transfer it to hex format, then
39; Ignatios Souvatzis' gpsahextoc utility to convert it to grf_ultmscode.h.
40;
41; This has been integrated into the NetBSD/Amiga kernel build procedure.
42;
43
44; memory map:
45; FF800000 .. FF9FFFFF	overlay planes
46; FFA00000 .. FFA0FFFF	ite support code
47; FFA10000 .. FFA1FFFF	ite support, input queue
48; FFA20000 .. FFA2FEFF	variables
49; FFA2FF00 .. FFA2FFFF	variables, X server
50; FFA30000 .. FFA3FFFF	font data
51; FFA40000 .. FFA4FFFF	font data, bold
52; FFA50000 .. FFA5FFFF	X server, input queue
53; FFA60000 .. FFFFC000	X server, onboard pixmaps
54
55; Start of data area
56	.org	$FFA20000
57d:
58
59;
60; Ring buffer for getting stuff from host
61; Data buffer:
62inbuf	=	$FFA10000	; 64kbits here (8k bytes)
63;
64; Pointers: (these must be at address $FFA20000)
65put:		.long	inbuf
66get:		.long	inbuf
67
68;
69; Mode bits for communication between GSP and CPU
70;
71; GSP mode bits: set by CPU, control GSP operation
72GSP_HOLD =	0
73GSP_FLUSH =	1
74GSP_ALT_SCRN =	2
75GSP_DISP_CTRL =	3
76GSP_NO_CURSOR =	4
77GSP_CALL_X =	5
78gsp_mode:	.word	0
79
80;
81; Pointer to X operation routine
82xproc:		.long	0
83
84; We leave the next few words for future communication requirements
85
86		.org	d+0x100
87;
88; Other data:
89magic:		.blkl	1		; set => screen already inited
90MAGIC =		0xD0D0BEAC
91
92screen_width:	.word	1024
93screen_height:	.word	768
94screen_origin:	.long	$FE000000	; just a placeholder
95screen_pitch:	.word	8192		; 1024*8
96pixel_size:	.word	8
97
98		.org	d+0x200
99font_adr:
100;
101; Font information is stored in the structure defined declared below.
102;
103bitmap_ptrs:	.long	$FFA30000	; points to first bitmap
104font_size:	.long	$00080008	; Y:X bitmap size
105under_row:	.word	6		; row # for underlines
106under_ht:	.word	1		; thickness of underline
107first_char:	.word	32		; first and last char in font
108last_char:	.word	255		;
109bold_smear:	.word	1		; for making bold fonts
110
111bgcolor:	.long	0		; background color
112fgcolor:	.long	$01010101	; foreground color
113;precomputed out of what the host gave us:
114font_area:	.word	64		; in pixels
115font_pitch:	.word	8
116font_lmo:	.word	28
117
118
119; Control register addresses
120hesync	=	$c0000000
121dpyctl	=	$c0000080
122control	=	$c00000b0
123convsp	=	$c0000130
124convdp	=	$c0000140
125psize	=	$c0000150
126
127;
128; Bits in control register
129T	=	$20		; enable transparency
130W	=	$C0		; window options
131PBH	=	$100		; pixblt horiz dirn
132PBV	=	$200		; pixblt vertical dirn
133PPOP	=	$7C00		; pixel processing options
134
135;
136; Bits in dpyctl register
137SRT	=	$800		; do serial register transfers
138
139free_memory:	.long	free_memory_start
140free_memory_start:		; allocate dynamic arrays from here
141
142;
143; Program starts here.
144	.org	$FFA00000
145	.start	.
146
147;
148; initialization
149;
150	setf	16,0,0		; just in case
151	setf	32,0,1
152	move	$fffff000,sp
153
154; Set up sync, blank parameters
155; done by host through interface
156
157; set up overlay clut:
158	move	$0,a0
159	move	a0,@$fe800000
160	move	$fe800030,a1
161	move	128,a0
162	move	a0,*a1
163	move	a0,*a1
164	move	a0,*a1
165	move	0,a0
166	move	a0,*a1
167	move	a0,*a1
168	move	a0,*a1
169	move	a0,*a1
170	move	a0,*a1
171	move	a0,*a1
172	move	a0,*a1
173	move	a0,*a1
174	move	a0,*a1
175
176; set up overlay planes:
177	move	6,a0
178	move	a0,@$fe800000
179	move	$0b,a0
180	move	a0,@$fe800020
181
182; set up global registers
183	move	@screen_pitch,b3,0
184	move	@screen_origin,b4,1
185	move	@bgcolor,b8,1
186	lmo	b3,b0
187	move	b0,@convdp,0
188	move	@control,a0,0
189	andn	$7FE0,a0		; clear PPOP, PBV, PBH, W, T fields
190	move	a0,@control,0
191	move	@pixel_size,a0,0
192	move	a0,@psize,0
193	move	@psize,a0,0
194
195
196; clear the entire screen
197	move	b4,b2
198	move	0,b9
199	move	@screen_width,b7,1
200	fill	l
201
2024:
203; main stuff...
204	move	@get,a0,1
205	jruc	main_loop
206loop_end:
207	clr	a4
208	move	a4,*a0,0
209	addxy	a1,a0
210	move	a0,@get,1
211main_loop:
212	move	@gsp_mode,a1,0
213	btst	GSP_CALL_X,a1
214	jreq	main_loop_1
215
216	mmtm	sp,a0,a1,a2,a3
217	move	@xproc,a4,1
218	call	a4
219	mmfm	sp,a0,a1,a2,a3
220
221main_loop_1:
222	move	@put,a3,1
223	move	*a0,a1,0
224
225	move	a1,a2
226	andi	$FFF0,a1
227	jrz	main_loop
228
229	sub	a0,a3
230	jreq	main_loop
231continue:
232	andi	$F,a2
233	jrz	loop_end
234	dec	a2
235	jrnz	testfor2
236; op 1 - char
237	movk	6,b10
238	move	b10,@$fe800000,0
239	movk	1,b10
240	move	b10,@$fe800020,0
241
242	move	a0,b10
243	move	*b10+,b12,0	; dummy move (faster than addk)
244	move	*b10+,b12,0	; char code
245	move	@first_char,b11,0
246	sub	b11,b12		; minus first char in font
247	move	@font_size,b7,1	;dydx - char size->pixel array dimensions
248	move	@font_pitch,b1
249	move	@font_lmo,b0
250	move	b0,@convsp,0
251	move	@font_area,b11
252
253	mpyu	b12,b11		; times char offset
254	move	@font_adr,b0,1	; font bitmaps base
255	add	b11,b0		; character bitmap start addr. linear
256
257	move	*b10+,b8,0	; fg
258	move	*b10+,b9,0	; bg
259	move	*b10+,b2,1	; y:x
260
261	move	*b10+,b11,0	; flags
262	move	b11,a4
263	btst	0,a4
264	jreq	noinv
265	move	b8,b11
266	move	b9,b8
267	move	b11,b9
268noinv:
269	btst	2,a4
270	jreq	nobold
271	addi	$10000,b0
272nobold:
273	move	b2,a5
274	pixblt	b,xy
275	move	a5,b2
276
277	btst	1,a4
278	jreq	noul
279	move	@under_row,b11,0
280	sll	16,b11		; shift into Y half
281	add	b11,b2
282	move	@under_ht,b11,0
283	sll	16,b11		; shift into Y half
284	movy	b11,b7		; and move Y half only
285	fill	xy
286noul:
287	jruc	loop_end
288testfor2:
289	dec	a2
290	jrnz	testfor3
291; op 2 - fill
292	move	a0,b10
293	move	*b10+,b9,0	; dummy move
294	move	*b10+,b9,0	; color
295	move	*b10+,b2,1	; XY start address
296	move	*b10+,b7,1	; dydx
297
298	move	@control,b0,0
299	move	b0,*-sp
300	move	*b10+,b0
301	setf	5,0,0
302	move	b0,@control+10
303	setf	16,0,0
304	move	@control,b0,0
305
306	fill	xy
307
308	move	*sp+,b0
309	move	b0,@control,0
310	jruc	loop_end,l
311
312testfor3:
313	dec	a2
314	jrnz	testfor4
315; op 3 - pixblt
316	move	a0,b10
317	move	@convdp,@convsp,0
318	move	*b10+,b0,0	; dummy move
319	move	*b10+,b0,1	; XY src
320	move	*b10+,b7,1	; dxdy
321	move	*b10+,b2,1	; XY dst
322	move	b3,b1
323	move	@control,b11,0
324	andni	PBH|PBV,b11
325	cmpxy	b0,b2
326	jrc	yok
327	ori	PBV,b11
328yok:	jrv	xok
329	ori	PBH,b11
330xok:	move	b11,@control,0
331	move	@control,b11,0
332
333	pixblt	xy,xy
334	jruc	loop_end,l
335
336testfor4:
337	dec	a2
338	jrnz	testfor5
339
340; op 4 - mirror the font and precompute some values.
341
342	move	@font_size,a5,0
343	movk	8,a6
344	cmp	a6,a5
345	jrle	t4b8
346	movi	16, a6
347t4b8:	move	a6,@font_pitch,0
348	lmo	a5,a6
349	move	a6,@font_lmo,0
350	move	@font_size+$10,a6,0
351	move	@font_pitch,a5,0
352	mpyu	a6,a5
353	move	a5,@font_area,0
354
355	move	@last_char,a6,0
356	move	@first_char,a5,0
357	sub	a5,a6
358	addk	1,a6
359	move	@font_size+$10,a5,0
360	mpyu	a6,a5
361	move	@font_size,a7,0
362	cmpi	8,a7
363	move	$7f7f,a12	; mask for bold smearing
364	jrgt	t4bf		; wider than 8 pixels?
365	addk	1,a5		; yes, the words are only half the # of rows
366	srl	1,a5
367	move	$7fff,a12	; mask for bold smearing changes, too
368t4bf:	move	@font_adr,a6,1
369	move	a6,a9
370	addi	$10000,a9 ; start address of bold font
371	move	@bold_smear,a10
372
373; fortunately, this loop fits into 3 of the 4 cache segments:
374; execution time: about 32 periods per word of font.
375
376mirlp:	move	*a6,a7
377	clr	a8
378
379	srl	1,a7
380	addc	a8,a8
381	srl	1,a7
382	addc	a8,a8
383	srl	1,a7
384	addc	a8,a8
385	srl	1,a7
386	addc	a8,a8
387
388	srl	1,a7
389	addc	a8,a8
390	srl	1,a7
391	addc	a8,a8
392	srl	1,a7
393	addc	a8,a8
394	srl	1,a7
395	addc	a8,a8
396
397	srl	1,a7
398	addc	a8,a8
399	srl	1,a7
400	addc	a8,a8
401	srl	1,a7
402	addc	a8,a8
403	srl	1,a7
404	addc	a8,a8
405
406	srl	1,a7
407	addc	a8,a8
408	srl	1,a7
409	addc	a8,a8
410	srl	1,a7
411	addc	a8,a8
412	srl	1,a7
413	addc	a8,a8
414
415	move	a8,*a6+
416	move	a8,a7
417	move	a10,a11
418smearlp:
419	and	a12,a7
420	sll	1,a7
421	or	a7,a8
422	dsj	a11,smearlp
423	move	a8,*a9+
424
425	dsj	a5,mirlp
426;; support odd-sized fonts. pitch must still be 8 or 16
427	move	@font_size,a5,0
428	move	@font_pitch,a6,0
429	sub	a5,a6
430	move	@font_adr,a5,1
431	add	a5,a6
432	move	a6,@font_adr,1
433;;
434	jruc	loop_end,l
435
436
437testfor5:
438	dec	a2
439	jrne	testfor6
440; loadclut --- load clut entry.
441;	1==overlay index red green blue
442;	for speed reasons, the host will load the image clut directly rather
443;	than through us, but its not that expensive to support both here
444;	just in case
445	move	a0,a4
446	addk	$10,a4
447	move	$fe800030,a6
448	move	*a4+,a5,0
449	jrne	t5l1
450	subk	$20,a6
451t5l1:	move	*a4+,a5,0
452	move	a5,@$fe800000,0
453	move	*a4+,a5,0
454	move	a5,*a6,0
455	move	*a4+,a5,0
456	move	a5,*a6,0
457	move	*a4+,a5,0
458	move	a5,*a6,0
459	jruc	loop_end,l
460
461testfor6:
462	dec	a2
463	jrne	testfor7
464
465; op 6: load new framebuffer size and position for ite support.
466	move	a0,b10
467	addk	$10,b10
468	move	*b10+,b7,1
469	move	b7,@screen_width,1
470	move	*b10+,b4,1
471	move	b4,@screen_origin,1
472	move	*b10+,b3,0
473	move	b3,@screen_pitch,0
474	lmo	b3,b0
475	move	b0,@convdp,0
476	move	*b10,b0,0
477	move	b0,@psize,0
478	move	b0,@pixel_size,0	; this syncs the psize write, too
479
480	jruc	loop_end,l
481
482testfor7:
483	jruc	loop_end,l
484;;;
485