omrasops.c revision 1.27 1 /* $NetBSD: omrasops.c,v 1.27 2024/09/20 03:24:05 isaki Exp $ */
2
3 /*-
4 * Copyright (c) 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Tohru Nishimura.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
33
34 __KERNEL_RCSID(0, "$NetBSD: omrasops.c,v 1.27 2024/09/20 03:24:05 isaki Exp $");
35
36 /*
37 * Designed specifically for 'm68k bitorder';
38 * - most significant byte is stored at lower address,
39 * - most significant bit is displayed at left most on screen.
40 * Implementation relies on;
41 * - first column is at 32bit aligned address,
42 * - font glyphs are stored in 32bit padded.
43 */
44 /*
45 * BMSEL affects both of
46 * 1) which plane a write to the common bitmap plane is reflected on and
47 * 2) which plane's ROP a write to the common ROP is reflected on.
48 *
49 * The common ROP is not a ROP applied to write to the common bitmap plane.
50 * It's equivalent to set ROPs of the plane selected in the plane mask one
51 * by one.
52 */
53
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/device.h>
57
58 #include <dev/wscons/wsconsio.h>
59 #include <dev/wscons/wsdisplayvar.h>
60 #include <dev/rasops/rasops.h>
61
62 #include <arch/luna68k/dev/omrasopsvar.h>
63
64 #ifdef luna68k
65 #define USE_M68K_ASM 1
66 #endif
67
68 /* To provide optimization conditions to compilers */
69 #if defined(__GNUC__)
70 #define ASSUME(cond) if (!(cond)) __unreachable()
71 #elif defined(__clang__) && __has_builtin(__builtin_assume)
72 #define ASSUME(cond) __builtin_assume(cond)
73 #else
74 #define ASSUME(cond) (void)(cond)
75 #endif
76
77 /* XXX it should be redesigned, including making the attributes support 8bpp */
78 typedef struct {
79 union {
80 int32_t all;
81 struct {
82 int8_t ismulti; /* is multi color used */
83 uint8_t fg;
84 uint8_t bg;
85 uint8_t reserved;
86 };
87 };
88 } rowattr_t;
89
90 /* wscons emulator operations */
91 static void om_cursor(void *, int, int, int);
92 static int om_mapchar(void *, int, u_int *);
93 static void om_putchar(void *, int, int, u_int, long);
94 static void om1_copycols(void *, int, int, int, int);
95 static void om4_copycols(void *, int, int, int, int);
96 static void om1_copyrows(void *, int, int, int num);
97 static void om4_copyrows(void *, int, int, int num);
98 static void om_erasecols(void *, int, int, int, long);
99 static void om_eraserows(void *, int, int, long);
100 static int om_allocattr(void *, int, int, int, long *);
101
102 static void om_fill(int, int, uint8_t *, int, int, uint32_t, int, int);
103 static void om_fill_color(int, int, uint8_t *, int, int, int, int);
104 static void om_rascopy_single(int, uint8_t *, uint8_t *, int16_t, int16_t,
105 uint8_t[]);
106 static void om4_rascopy_multi(uint8_t *, uint8_t *, int16_t, int16_t);
107 static void om_unpack_attr(long, uint8_t *, uint8_t *, int *);
108
109 static int omrasops_init(struct rasops_info *, int, int);
110
111 /*
112 * XXX should be fixed...
113 * This number of elements is derived from howmany(1024, fontheight = 24).
114 * But it is currently initialized with row = 34, so it is used only up to 34.
115 */
116 #define OMRASOPS_MAX_ROWS 43
117 static rowattr_t rowattr[OMRASOPS_MAX_ROWS];
118
119 #define ALL1BITS (~0U)
120 #define ALL0BITS (0U)
121 #define BLITWIDTH (32)
122 #define ALIGNMASK (0x1f)
123 #define BYTESDONE (4)
124
125 #if 0 /* XXX not used yet */
126 /*
127 * internal attributes. see om_allocattr().
128 */
129 #define OMFB_ATTR_MULTICOLOR (1U << 31)
130 #define OMFB_ATTR_UNDERLINE (1U << 17)
131 #define OMFB_ATTR_BOLD (1U << 16)
132 #endif
133
134 /*
135 * XXX deprecated.
136 * This way cannot be extended to 8bpp, so don't use it in new code.
137 */
138 #define P0(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 1))
139 #define P1(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 2))
140 #define P2(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 3))
141 #define P3(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 4))
142
143 /*
144 * macros to handle unaligned bit copy ops.
145 * See src/sys/dev/rasops/rasops_masks.h for MI version.
146 * Also refer src/sys/arch/hp300/dev/maskbits.h.
147 * (which was implemented for ancient src/sys/arch/hp300/dev/grf_hy.c)
148 */
149
150 /* luna68k version GETBITS() that gets w bits from bit x at psrc memory */
151 #define FASTGETBITS(psrc, x, w, dst) \
152 asm("bfextu %3{%1:%2},%0" \
153 : "=d" (dst) \
154 : "di" (x), "di" (w), "o" (*(uint32_t *)(psrc)))
155
156 /* luna68k version PUTBITS() that puts w bits from bit x at pdst memory */
157 /* XXX this macro assumes (x + w) <= 32 to handle unaligned residual bits */
158 #define FASTPUTBITS(src, x, w, pdst) \
159 asm("bfins %3,%0{%1:%2}" \
160 : "+o" (*(uint32_t *)(pdst)) \
161 : "di" (x), "di" (w), "d" (src) \
162 : "memory" )
163
164 #define GETBITS(psrc, x, w, dst) FASTGETBITS(psrc, x, w, dst)
165 #define PUTBITS(src, x, w, pdst) FASTPUTBITS(src, x, w, pdst)
166
167 /*
168 * Clear lower w bits from x.
169 * x must be filled with 1 at least lower w bits.
170 */
171 #if USE_M68K_ASM
172 #define CLEAR_LOWER_BITS(x, w) \
173 asm volatile( \
174 " bclr %[width],%[data] ;\n" \
175 " addq.l #1,%[data] ;\n" \
176 : [data] "+&d" (x) \
177 : [width] "d" (w) \
178 : \
179 )
180 #else
181 #define CLEAR_LOWER_BITS(x, w) x = ((x) & ~(1U << (w))) + 1
182 #endif
183
184 /* Set planemask for the common plane and the common ROP */
185 static inline void
186 om_set_planemask(int planemask)
187 {
188
189 *(volatile uint32_t *)OMFB_PLANEMASK = planemask;
190 }
191
192 /* Get a ROP address */
193 static inline volatile uint32_t *
194 om_rop_addr(int plane, int rop)
195 {
196
197 return (volatile uint32_t *)
198 (OMFB_ROP_P0 + OMFB_PLANEOFFS * plane + rop * 4);
199 }
200
201 /* Set ROP and ROP's mask for individual plane */
202 static inline void
203 om_set_rop(int plane, int rop, uint32_t mask)
204 {
205
206 *om_rop_addr(plane, rop) = mask;
207 }
208
209 /* Set ROP and ROP's mask for current setplanemask-ed plane(s) */
210 static inline void
211 om_set_rop_curplane(int rop, uint32_t mask)
212 {
213
214 ((volatile uint32_t *)(OMFB_ROP_COMMON))[rop] = mask;
215 }
216
217 /* Reset planemask and ROP */
218 static inline void
219 om_reset_planemask_and_rop(void)
220 {
221
222 om_set_planemask(hwplanemask);
223 om_set_rop_curplane(ROP_THROUGH, ~0U);
224 }
225
226 static inline void
227 om_set_rowattr(int row, uint8_t fg, uint8_t bg)
228 {
229
230 if (rowattr[row].fg == fg && rowattr[row].bg == bg)
231 return;
232 if (rowattr[row].ismulti)
233 return;
234
235 if (rowattr[row].fg == rowattr[row].bg) {
236 /* From the initial (erased) state, */
237 if (rowattr[row].fg != fg && rowattr[row].bg != bg) {
238 /* if both are changed at once, it's multi color */
239 rowattr[row].ismulti = true;
240 } else {
241 /* otherwise, it's single color */
242 rowattr[row].fg = fg;
243 rowattr[row].bg = bg;
244 }
245 } else {
246 rowattr[row].ismulti = true;
247 }
248 }
249
250 static inline void
251 om_reset_rowattr(int row, uint8_t bg)
252 {
253
254 /* Setting fg equal to bg means 'reset' or 'erased'. */
255 rowattr[row].ismulti = false;
256 rowattr[row].bg = bg;
257 rowattr[row].fg = bg;
258 }
259
260 /*
261 * Fill rectangle.
262 * val is assumed only ALL0BITS or ALL1BITS, because all bits are used as is
263 * regardless of bit offset of the destination.
264 */
265 static void
266 om_fill(int planemask, int rop, uint8_t *dstptr, int dstbitoffs, int dstspan,
267 uint32_t val, int width, int height)
268 {
269 uint32_t mask;
270 uint32_t prev_mask;
271 int32_t height_m1;
272 int dw; /* 1 pass width bits */
273
274 ASSUME(width > 0);
275 ASSUME(height > 0);
276 ASSUME(0 <= dstbitoffs && dstbitoffs < 32);
277
278 om_set_planemask(planemask);
279
280 height_m1 = height - 1;
281 mask = ALL1BITS >> dstbitoffs;
282 prev_mask = ~mask;
283 dw = 32 - dstbitoffs;
284
285 /* do-while loop seems slightly faster than a for loop */
286 do {
287 uint8_t *d;
288 int32_t h;
289
290 width -= dw;
291 if (width < 0) {
292 CLEAR_LOWER_BITS(mask, -width);
293 /* To exit this loop. */
294 width = 0;
295 }
296
297 if (prev_mask != mask) {
298 om_set_rop_curplane(rop, mask);
299 prev_mask = mask;
300 }
301
302 d = dstptr;
303 dstptr += 4;
304 h = height_m1;
305
306 #if USE_M68K_ASM
307 asm volatile("\n"
308 "om_fill_loop_h:\n"
309 " move.l %[val],(%[d]) ;\n"
310 " add.l %[dstspan],%[d] ;\n"
311 " dbra %[h],om_fill_loop_h ;\n"
312 : /* output */
313 [d] "+&a" (d),
314 [h] "+&d" (h)
315 : /* input */
316 [val] "d" (val),
317 [dstspan] "r" (dstspan)
318 : /* clobbers */
319 "memory"
320 );
321 #else
322 do {
323 *(uint32_t *)d = val;
324 d += dstspan;
325 } while (--h >= 0);
326 #endif
327 mask = ALL1BITS;
328 dw = 32;
329 } while (width > 0);
330 }
331
332 static void
333 om_fill_color(int planecount, int color, uint8_t *dstptr, int dstbitoffs,
334 int dstspan, int width, int height)
335 {
336 uint32_t mask;
337 uint32_t prev_mask;
338 int32_t height_m1;
339 int dw; /* 1 pass width bits */
340
341 ASSUME(width > 0);
342 ASSUME(height > 0);
343 ASSUME(planecount > 0);
344
345 /* select all planes */
346 om_set_planemask(hwplanemask);
347
348 mask = ALL1BITS >> dstbitoffs;
349 prev_mask = ~mask;
350 dw = 32 - dstbitoffs;
351 height_m1 = height - 1;
352
353 do {
354 uint8_t *d;
355 int32_t plane;
356 int32_t h;
357 int16_t rop;
358
359 width -= dw;
360 if (width < 0) {
361 CLEAR_LOWER_BITS(mask, -width);
362 /* To exit this loop. */
363 width = 0;
364 }
365
366 if (prev_mask != mask) {
367 for (plane = 0; plane < planecount; plane++) {
368 if ((color & (1U << plane)) != 0)
369 rop = ROP_ONE;
370 else
371 rop = ROP_ZERO;
372 om_set_rop(plane, rop, mask);
373 }
374 prev_mask = mask;
375 }
376
377 d = dstptr;
378 dstptr += 4;
379 h = height_m1;
380
381 #if USE_M68K_ASM
382 asm volatile("\n"
383 "om_fill_color_loop_h:\n"
384 " clr.l (%[d]) ;\n"
385 " add.l %[dstspan],%[d] ;\n"
386 " dbra %[h],om_fill_color_loop_h ;\n"
387 : /* output */
388 [d] "+&a" (d),
389 [h] "+&d" (h)
390 : /* input */
391 [dstspan] "r" (dstspan)
392 : /* clobbers */
393 "memory"
394 );
395 #else
396 do {
397 /*
398 * ROP is either ONE or ZERO,
399 * so don't care what you write to *d.
400 */
401 *(uint32_t *)d = 0;
402 d += dstspan;
403 } while (--h >= 0);
404 #endif
405 mask = ALL1BITS;
406 dw = 32;
407 } while (width > 0);
408 }
409
410 /*
411 * Calculate ROP depending on fg/bg color combination as follows.
412 * This is called per individual plane while shifting fg and bg.
413 * So the LSB of fg and bg points to this plane.
414 *
415 * All ROP values we want to use here happens to be a multiple of 5.
416 *
417 * bg fg rop result
418 * -- -- ---------------- ------
419 * 0 0 ROP_ZERO = 0 0
420 * 0 1 ROP_THROUGH = 5 D
421 * 1 0 ROP_INV1 = 10 ~D
422 * 1 1 ROP_ONE = 15 1
423 *
424 * This allows characters to be drawn in the specified fg/bg colors with
425 * a single write to the common plane.
426 */
427 static inline int
428 om_fgbg2rop(uint8_t fg, uint8_t bg)
429 {
430 int t;
431
432 t = (bg & 1) * 2 + (fg & 1);
433 return t * 5;
434 }
435
436 /*
437 * Blit a character at the specified co-ordinates.
438 * This function modifies(breaks) the planemask and ROPs.
439 */
440 static void
441 om_putchar(void *cookie, int row, int startcol, u_int uc, long attr)
442 {
443 struct rasops_info *ri = cookie;
444 uint8_t *fontptr;
445 uint8_t *dstcmn;
446 uint32_t mask;
447 int width;
448 int height;
449 int planecount;
450 int x, y;
451 int fontstride;
452 int fontx;
453 int plane;
454 int dw; /* 1 pass width bits */
455 int xh, xl;
456 uint8_t fg, bg;
457 /* ROP address cache */
458 static volatile uint32_t *ropaddr[OMFB_MAX_PLANECOUNT];
459 static uint8_t last_fg, last_bg;
460
461 if (uc >= 0x80)
462 return;
463
464 width = ri->ri_font->fontwidth;
465 height = ri->ri_font->fontheight;
466 planecount = ri->ri_depth;
467 fontstride = ri->ri_font->stride;
468 y = height * row;
469 x = width * startcol;
470 fontptr = (uint8_t *)ri->ri_font->data +
471 (uc - ri->ri_font->firstchar) * ri->ri_fontscale;
472
473 om_unpack_attr(attr, &fg, &bg, NULL);
474 om_set_rowattr(row, fg, bg);
475
476 if (last_fg != fg || last_bg != bg) {
477 last_fg = fg;
478 last_bg = bg;
479 /* calculate ROP */
480 for (plane = 0; plane < planecount; plane++) {
481 int t = om_fgbg2rop(fg, bg);
482 ropaddr[plane] = om_rop_addr(plane, t);
483 fg >>= 1;
484 bg >>= 1;
485 }
486 }
487
488 /* divide x into the lower 5 bits and the rest. */
489 xh = x >> 5;
490 xl = x & 0x1f;
491
492 /* write to common plane */
493 dstcmn = (uint8_t *)ri->ri_bits + xh * 4 + y * OMFB_STRIDE;
494
495 /* select all plane */
496 om_set_planemask(hwplanemask);
497
498 fontx = 0;
499 mask = ALL1BITS >> xl;
500 dw = 32 - xl;
501
502 ASSUME(planecount == 1 ||
503 planecount == 4 ||
504 planecount == 8);
505
506 do {
507 uint8_t *d;
508 uint8_t *f;
509 int32_t h;
510
511 width -= dw;
512 if (width < 0) {
513 CLEAR_LOWER_BITS(mask, -width);
514 /* To exit this loop. */
515 width = 0;
516 }
517
518 switch (planecount) {
519 case 8:
520 *(ropaddr[7]) = mask;
521 *(ropaddr[6]) = mask;
522 *(ropaddr[5]) = mask;
523 *(ropaddr[4]) = mask;
524 /* FALLTHROUGH */
525 case 4:
526 *(ropaddr[3]) = mask;
527 *(ropaddr[2]) = mask;
528 *(ropaddr[1]) = mask;
529 /* FALLTHROUGH */
530 case 1:
531 *(ropaddr[0]) = mask;
532 break;
533 }
534
535 d = dstcmn;
536 f = fontptr;
537 h = height - 1;
538 do {
539 uint32_t v;
540 GETBITS(f, fontx, dw, v);
541 /* no need to shift v because it's masked by ROP */
542 *(uint32_t *)d = v;
543 d += OMFB_STRIDE;
544 f += fontstride;
545 } while (--h >= 0);
546
547 dstcmn += 4;
548 fontx += dw;
549 mask = ALL1BITS;
550 dw = 32;
551 } while (width > 0);
552
553 om_reset_planemask_and_rop();
554 }
555
556 static void
557 om_erasecols(void *cookie, int row, int startcol, int ncols, long attr)
558 {
559 struct rasops_info *ri = cookie;
560 int startx;
561 int width;
562 int height;
563 int planecount;
564 int sh, sl;
565 int y;
566 int scanspan;
567 uint8_t *p;
568 uint8_t fg, bg;
569
570 scanspan = ri->ri_stride;
571 y = ri->ri_font->fontheight * row;
572 startx = ri->ri_font->fontwidth * startcol;
573 width = ri->ri_font->fontwidth * ncols;
574 height = ri->ri_font->fontheight;
575 planecount = ri->ri_depth;
576 om_unpack_attr(attr, &fg, &bg, NULL);
577 sh = startx >> 5;
578 sl = startx & 0x1f;
579 p = (uint8_t *)ri->ri_bits + y * scanspan + sh * 4;
580
581 /* I'm not sure */
582 om_set_rowattr(row, fg, bg);
583
584 if (bg == 0) {
585 /* om_fill seems slightly efficient */
586 om_fill(hwplanemask, ROP_ZERO,
587 p, sl, scanspan, 0, width, height);
588 } else {
589 om_fill_color(planecount, bg, p, sl, scanspan, width, height);
590 }
591
592 /* reset mask value */
593 om_reset_planemask_and_rop();
594 }
595
596 static void
597 om_eraserows(void *cookie, int startrow, int nrows, long attr)
598 {
599 struct rasops_info *ri = cookie;
600 int startx;
601 int width;
602 int height;
603 int planecount;
604 int sh, sl;
605 int y;
606 int scanspan;
607 int row;
608 uint8_t *p;
609 uint8_t fg, bg;
610
611 scanspan = ri->ri_stride;
612 y = ri->ri_font->fontheight * startrow;
613 startx = 0;
614 width = ri->ri_emuwidth;
615 height = ri->ri_font->fontheight * nrows;
616 planecount = ri->ri_depth;
617 om_unpack_attr(attr, &fg, &bg, NULL);
618 sh = startx >> 5;
619 sl = startx & 0x1f;
620 p = (uint8_t *)ri->ri_bits + y * scanspan + sh * 4;
621
622 for (row = startrow; row < startrow + nrows; row++) {
623 om_reset_rowattr(row, bg);
624 }
625
626 if (bg == 0) {
627 /* om_fill seems slightly efficient */
628 om_fill(hwplanemask, ROP_ZERO,
629 p, sl, scanspan, 0, width, height);
630 } else {
631 om_fill_color(planecount, bg, p, sl, scanspan, width, height);
632 }
633 /* reset mask value */
634 om_reset_planemask_and_rop();
635 }
636
637 /*
638 * Single plane raster copy.
639 * dst: destination plane pointer.
640 * src: source plane pointer.
641 * if y-forward, src > dst, point to left-top.
642 * if y-backward, src < dst, point to left-bottom.
643 * width: pixel width (must > 0)
644 * height: pixel height (> 0 if forward, < 0 if backward)
645 * rop: ROP array with planecount elements.
646 *
647 * This function modifies(breaks) the planemask and ROPs.
648 */
649 static void
650 om_rascopy_single(int planecount, uint8_t *dst, uint8_t *src,
651 int16_t width, int16_t height, uint8_t rop[])
652 {
653 uint32_t mask;
654 int wh;
655 int wl;
656 int step;
657 int plane;
658 int16_t height_m1;
659 int16_t w, h;
660
661 step = OMFB_STRIDE;
662
663 /*
664 * X direction is always forward (or ascend order) to use (An)+
665 * addressing mode in asm.
666 */
667
668 /* Reverse order Y if backward copy */
669 if (height < 0) {
670 /* The sign is managed by step, height is always positive */
671 step = -step;
672 height = -height;
673 }
674 height_m1 = height - 1;
675
676 /*
677 * On single, it's not necessary to process two longwords at a time,
678 * but we do so for symmetry and speedup.
679 */
680
681 /* First, transfer a rectangle consist of two longwords */
682 wh = (width >> 6);
683 if (wh > 0) {
684 int step8 = step - wh * 8;
685
686 #if USE_M68K_ASM
687 wh--; /* for dbra */
688 h = height_m1;
689 asm volatile("\n"
690 "om_rascopy_single_LL:\n"
691 " move.w %[wh],%[w] ;\n"
692 "1:\n"
693 " move.l (%[src])+,(%[dst])+ ;\n"
694 " move.l (%[src])+,(%[dst])+ ;\n"
695 " dbra %[w],1b ;\n"
696
697 " adda.l %[step8],%[src] ;\n"
698 " adda.l %[step8],%[dst] ;\n"
699 " dbra %[h],om_rascopy_single_LL ;\n"
700 : /* output */
701 [src] "+&a" (src),
702 [dst] "+&a" (dst),
703 [h] "+&d" (h),
704 [w] "=&d" (w)
705 : /* input */
706 [wh] "r" (wh),
707 [step8] "r" (step8)
708 : /* clobbers */
709 "memory"
710 );
711 #else
712 wh--; /* to match to asm side */
713 for (h = height_m1; h >= 0; h--) {
714 uint32_t *s32 = (uint32_t *)src;
715 uint32_t *d32 = (uint32_t *)dst;
716 for (w = wh; w >= 0; w--) {
717 *d32++ = *s32++;
718 *d32++ = *s32++;
719 }
720 src = (uint8_t *)s32 + step8;
721 dst = (uint8_t *)d32 + step8;
722 }
723 #endif
724
725 if ((width & 0x3f) == 0) {
726 /* transfer completed */
727 return;
728 }
729
730 /* rewind y for the next transfer */
731 src -= height * step;
732 dst -= height * step;
733 }
734
735 if ((width & 32) != 0) {
736 /* Transfer one longword since an odd longword */
737 #if USE_M68K_ASM
738 h = height_m1;
739 asm volatile("\n"
740 "om_rascopy_single_L:\n"
741 " move.l (%[src]),(%[dst]) ;\n"
742 " adda.l %[step],%[src] ;\n"
743 " adda.l %[step],%[dst] ;\n"
744 " dbra %[h],om_rascopy_single_L ;\n"
745 : /* output */
746 [src] "+&a" (src),
747 [dst] "+&a" (dst),
748 [h] "+&d" (h)
749 : /* input */
750 [step] "r" (step)
751 : /* clobbers */
752 "memory"
753 );
754 #else
755 for (h = height_m1; h >= 0; h--) {
756 *(uint32_t *)dst = *(uint32_t *)src;
757 dst += step;
758 src += step;
759 }
760 #endif
761
762 if ((width & 0x1f) == 0) {
763 /* transfer completed */
764 return;
765 }
766
767 /* rewind y for the next transfer */
768 src += 4 - height * step;
769 dst += 4 - height * step;
770 }
771
772 wl = width & 0x1f;
773 /* wl > 0 at this point */
774
775 /* Then, transfer residual bits */
776
777 mask = ALL1BITS << (32 - wl);
778 /*
779 * The common ROP cannot be used here. Because the hardware doesn't
780 * allow you to set the mask while keeping the ROP states.
781 */
782 for (plane = 0; plane < planecount; plane++) {
783 om_set_rop(plane, rop[plane], mask);
784 }
785
786 #if USE_M68K_ASM
787 h = height_m1;
788 asm volatile("\n"
789 "om_rascopy_single_bit:\n"
790 " move.l (%[src]),(%[dst]) ;\n"
791 " adda.l %[step],%[src] ;\n"
792 " adda.l %[step],%[dst] ;\n"
793 " dbra %[h],om_rascopy_single_bit ;\n"
794 : /* output */
795 [src] "+&a" (src),
796 [dst] "+&a" (dst),
797 [h] "+&d" (h)
798 : /* input */
799 [step] "r" (step)
800 : /* clobbers */
801 "memory"
802 );
803 #else
804 for (h = height_m1; h >= 0; h--) {
805 *(uint32_t *)dst = *(uint32_t *)src;
806 dst += step;
807 src += step;
808 }
809 #endif
810
811 for (plane = 0; plane < planecount; plane++) {
812 om_set_rop(plane, rop[plane], ALL1BITS);
813 }
814 }
815
816 /*
817 * Multiple plane raster copy.
818 * dst0: destination pointer in Plane0.
819 * src0: source pointer in Plane0.
820 * if y-forward, src0 > dst0, point to left-top.
821 * if y-backward, src0 < dst0, point to left-bottom.
822 * width: pixel width (must > 0)
823 * height: pixel height (> 0 if forward, < 0 if backward)
824 *
825 * This function modifies(breaks) the planemask and ROPs.
826 */
827 static void
828 om4_rascopy_multi(uint8_t *dst0, uint8_t *src0, int16_t width, int16_t height)
829 {
830 uint8_t *dst1, *dst2, *dst3;
831 int wh;
832 int wl;
833 int rewind;
834 int step;
835 uint32_t mask;
836 int16_t height_m1;
837 int16_t w, h;
838
839 step = OMFB_STRIDE;
840
841 /*
842 * X direction is always forward (or ascend order) to use (An)+
843 * addressing mode in asm.
844 */
845
846 /* Reverse order Y if backward copy */
847 if (height < 0) {
848 /* The sign is managed by step, height is always positive */
849 step = -step;
850 height = -height;
851 }
852 height_m1 = height - 1;
853
854 dst1 = dst0 + OMFB_PLANEOFFS;
855 dst2 = dst1 + OMFB_PLANEOFFS;
856 dst3 = dst2 + OMFB_PLANEOFFS;
857
858 /* First, transfer a rectangle consist of two longwords */
859 wh = width >> 6;
860 if (wh > 0) {
861 int step8 = step - wh * 8;
862
863 #if USE_M68K_ASM
864 wh--; /* for dbra */
865 h = height_m1;
866 asm volatile("\n"
867 "om4_rascopy_multi_LL:\n"
868 " move.w %[wh],%[w] ;\n"
869 "1:\n"
870 /*
871 * Optimized for 68030.
872 *
873 * On LUNA, the following is faster than any of
874 * "MOVE.L (An)+,(An)+", "MOVE.L (An,Dn),(An,Dn)", or
875 * "MOVEM.L", due to the relationship of instruction
876 * overlaps and access waits.
877 *
878 * The head time of (An)+ as source operand is 0 and
879 * the head time of ADDA instruction is 2. If the
880 * previous instruction has some write wait cycles,
881 * i.e., tail cycles, (An)+ as source operand cannot
882 * overlap it but ADDA instruction can.
883 */
884 " move.l (%[src0]),(%[dst0])+ ;\n" /* P0 */
885 " adda.l %[PLANEOFFS],%[src0] ;\n"
886 " move.l (%[src0]),(%[dst1])+ ;\n" /* P1 */
887 " adda.l %[PLANEOFFS],%[src0] ;\n"
888 " move.l (%[src0]),(%[dst2])+ ;\n" /* P2 */
889 " adda.l %[PLANEOFFS],%[src0] ;\n"
890 " move.l (%[src0]),(%[dst3])+ ;\n" /* P3 */
891 /* Expect an overlap, so don't use (An)+ */
892 " addq.l #4,%[src0] ;\n"
893
894 " move.l (%[src0]),(%[dst3])+ ;\n" /* P3 */
895 " suba.l %[PLANEOFFS],%[src0] ;\n"
896 " move.l (%[src0]),(%[dst2])+ ;\n" /* P2 */
897 " suba.l %[PLANEOFFS],%[src0] ;\n"
898 " move.l (%[src0]),(%[dst1])+ ;\n" /* P1 */
899 " suba.l %[PLANEOFFS],%[src0] ;\n"
900 " move.l (%[src0])+,(%[dst0])+ ;\n" /* P0 */
901 " dbra %[w],1b ;\n"
902
903 " adda.l %[step8],%[src0] ;\n"
904 " adda.l %[step8],%[dst0] ;\n"
905 " adda.l %[step8],%[dst1] ;\n"
906 " adda.l %[step8],%[dst2] ;\n"
907 " adda.l %[step8],%[dst3] ;\n"
908 " dbra %[h],om4_rascopy_multi_LL ;\n"
909 : /* output */
910 [src0] "+&a" (src0),
911 [dst0] "+&a" (dst0),
912 [dst1] "+&a" (dst1),
913 [dst2] "+&a" (dst2),
914 [dst3] "+&a" (dst3),
915 [h] "+&d" (h),
916 [w] "=&d" (w)
917 : /* input */
918 [wh] "r" (wh),
919 [PLANEOFFS] "r" (OMFB_PLANEOFFS),
920 [step8] "r" (step8)
921 : /* clobbers */
922 "memory"
923 );
924 #else
925 wh--; /* to match to asm side */
926 for (h = height_m1; h >= 0; h--) {
927 for (w = wh; w >= 0; w--) {
928 *(uint32_t *)dst0 = *(uint32_t *)src0;
929 dst0 += 4;
930 src0 += OMFB_PLANEOFFS;
931 *(uint32_t *)dst1 = *(uint32_t *)src0;
932 dst1 += 4;
933 src0 += OMFB_PLANEOFFS;
934 *(uint32_t *)dst2 = *(uint32_t *)src0;
935 dst2 += 4;
936 src0 += OMFB_PLANEOFFS;
937 *(uint32_t *)dst3 = *(uint32_t *)src0;
938 dst3 += 4;
939 src0 += 4;
940
941 *(uint32_t *)dst3 = *(uint32_t *)src0;
942 dst3 += 4;
943 src0 -= OMFB_PLANEOFFS;
944 *(uint32_t *)dst2 = *(uint32_t *)src0;
945 dst2 += 4;
946 src0 -= OMFB_PLANEOFFS;
947 *(uint32_t *)dst1 = *(uint32_t *)src0;
948 dst1 += 4;
949 src0 -= OMFB_PLANEOFFS;
950 *(uint32_t *)dst0 = *(uint32_t *)src0;
951 dst0 += 4;
952 src0 += 4;
953 }
954 src0 += step8;
955 dst0 += step8;
956 dst1 += step8;
957 dst2 += step8;
958 dst3 += step8;
959 }
960 #endif
961
962 if ((width & 0x3f) == 0) {
963 /* transfer completed */
964 return;
965 }
966
967 /* rewind y for the next transfer */
968 src0 -= height * step;
969 dst0 -= height * step;
970 dst1 -= height * step;
971 dst2 -= height * step;
972 dst3 -= height * step;
973 }
974
975 /* This rewind rewinds the plane, so Y order is irrelevant */
976 rewind = OMFB_STRIDE - OMFB_PLANEOFFS * 3;
977
978 if ((width & 32) != 0) {
979 /* Transfer one longword since an odd longword */
980 #if USE_M68K_ASM
981 h = height_m1;
982 asm volatile("\n"
983 "om4_rascopy_multi_L:\n"
984 " move.l (%[src0]),(%[dst0]) ;\n"
985 " adda.l %[PLANEOFFS],%[src0] ;\n"
986 " move.l (%[src0]),(%[dst1]) ;\n"
987 " adda.l %[PLANEOFFS],%[src0] ;\n"
988 " move.l (%[src0]),(%[dst2]) ;\n"
989 " adda.l %[PLANEOFFS],%[src0] ;\n"
990 " move.l (%[src0]),(%[dst3]) ;\n"
991 " adda.l %[rewind],%[src0] ;\n"
992
993 " adda.l %[step],%[dst0] ;\n"
994 " adda.l %[step],%[dst1] ;\n"
995 " adda.l %[step],%[dst2] ;\n"
996 " adda.l %[step],%[dst3] ;\n"
997 " dbra %[h],om4_rascopy_multi_L ;\n"
998 : /* output */
999 [src0] "+&a" (src0),
1000 [dst0] "+&a" (dst0),
1001 [dst1] "+&a" (dst1),
1002 [dst2] "+&a" (dst2),
1003 [dst3] "+&a" (dst3),
1004 [h] "+&d" (h)
1005 : /* input */
1006 [PLANEOFFS] "r" (OMFB_PLANEOFFS),
1007 [rewind] "r" (rewind),
1008 [step] "r" (step)
1009 : /* clobbers */
1010 "memory"
1011 );
1012 #else
1013 for (h = height_m1; h >= 0; h--) {
1014 *(uint32_t *)dst0 = *(uint32_t *)src0;
1015 src0 += OMFB_PLANEOFFS;
1016 *(uint32_t *)dst1 = *(uint32_t *)src0;
1017 src0 += OMFB_PLANEOFFS;
1018 *(uint32_t *)dst2 = *(uint32_t *)src0;
1019 src0 += OMFB_PLANEOFFS;
1020 *(uint32_t *)dst3 = *(uint32_t *)src0;
1021 src0 += rewind;
1022
1023 dst0 += step;
1024 dst1 += step;
1025 dst2 += step;
1026 dst3 += step;
1027 }
1028 #endif
1029
1030 if ((width & 0x1f) == 0) {
1031 /* transfer completed */
1032 return;
1033 }
1034
1035 /* rewind y for the next transfer */
1036 src0 += 4 - height * step;
1037 dst0 += 4 - height * step;
1038 dst1 += 4 - height * step;
1039 dst2 += 4 - height * step;
1040 dst3 += 4 - height * step;
1041 }
1042
1043 wl = width & 0x1f;
1044 /* wl > 0 at this point */
1045
1046 /* Then, transfer residual bits */
1047
1048 mask = ALL1BITS << (32 - wl);
1049 om_set_planemask(hwplanemask);
1050 om_set_rop_curplane(ROP_THROUGH, mask);
1051
1052 #if USE_M68K_ASM
1053 h = height_m1;
1054 asm volatile("\n"
1055 "om4_rascopy_multi_bit:\n"
1056 " move.l (%[src0]),(%[dst0]) ;\n"
1057 " adda.l %[PLANEOFFS],%[src0] ;\n"
1058 " move.l (%[src0]),(%[dst1]) ;\n"
1059 " adda.l %[PLANEOFFS],%[src0] ;\n"
1060 " move.l (%[src0]),(%[dst2]) ;\n"
1061 " adda.l %[PLANEOFFS],%[src0] ;\n"
1062 " move.l (%[src0]),(%[dst3]) ;\n"
1063 " adda.l %[rewind],%[src0] ;\n"
1064
1065 " adda.l %[step],%[dst0] ;\n"
1066 " adda.l %[step],%[dst1] ;\n"
1067 " adda.l %[step],%[dst2] ;\n"
1068 " adda.l %[step],%[dst3] ;\n"
1069 " dbra %[h],om4_rascopy_multi_bit ;\n"
1070 : /* output */
1071 [src0] "+&a" (src0),
1072 [dst0] "+&a" (dst0),
1073 [dst1] "+&a" (dst1),
1074 [dst2] "+&a" (dst2),
1075 [dst3] "+&a" (dst3),
1076 [h] "+&d" (h)
1077 : /* input */
1078 [PLANEOFFS] "r" (OMFB_PLANEOFFS),
1079 [rewind] "r" (rewind),
1080 [step] "r" (step)
1081 : /* clobbers */
1082 "memory"
1083 );
1084 #else
1085 for (h = height_m1; h >= 0; h--) {
1086 *(uint32_t *)dst0 = *(uint32_t *)src0;
1087 src0 += OMFB_PLANEOFFS;
1088 *(uint32_t *)dst1 = *(uint32_t *)src0;
1089 src0 += OMFB_PLANEOFFS;
1090 *(uint32_t *)dst2 = *(uint32_t *)src0;
1091 src0 += OMFB_PLANEOFFS;
1092 *(uint32_t *)dst3 = *(uint32_t *)src0;
1093 src0 += rewind;
1094
1095 dst0 += step;
1096 dst1 += step;
1097 dst2 += step;
1098 dst3 += step;
1099 }
1100 #endif
1101 om_reset_planemask_and_rop();
1102 }
1103
1104 static void
1105 om1_copyrows(void *cookie, int srcrow, int dstrow, int nrows)
1106 {
1107 struct rasops_info *ri = cookie;
1108 uint8_t *p, *q;
1109 int scanspan, offset, srcy, height, width, w;
1110 uint32_t rmask;
1111
1112 scanspan = ri->ri_stride;
1113 height = ri->ri_font->fontheight * nrows;
1114 offset = (dstrow - srcrow) * scanspan * ri->ri_font->fontheight;
1115 srcy = ri->ri_font->fontheight * srcrow;
1116 if (srcrow < dstrow && srcrow + nrows > dstrow) {
1117 scanspan = -scanspan;
1118 srcy = srcy + height - 1;
1119 }
1120
1121 p = (uint8_t *)ri->ri_bits + srcy * ri->ri_stride;
1122 w = ri->ri_emuwidth;
1123 width = w;
1124 rmask = ALL1BITS << (-width & ALIGNMASK);
1125 q = p;
1126 while (height > 0) {
1127 *P0(p + offset) = *P0(p); /* always aligned */
1128 width -= 2 * BLITWIDTH;
1129 while (width > 0) {
1130 p += BYTESDONE;
1131 *P0(p + offset) = *P0(p);
1132 width -= BLITWIDTH;
1133 }
1134 p += BYTESDONE;
1135 *P0(p + offset) = (*P0(p) & rmask) | (*P0(p + offset) & ~rmask);
1136
1137 p = (q += scanspan);
1138 width = w;
1139 height--;
1140 }
1141 }
1142
1143 static void
1144 om4_copyrows(void *cookie, int srcrow, int dstrow, int nrows)
1145 {
1146 struct rasops_info *ri = cookie;
1147 uint8_t *src, *dst;
1148 int width, rowheight;
1149 int planecount;
1150 int ptrstep, rowstep;
1151 int srcplane;
1152 int i;
1153 int r;
1154 uint8_t rop[OMFB_MAX_PLANECOUNT];
1155
1156 width = ri->ri_emuwidth;
1157 rowheight = ri->ri_font->fontheight;
1158 planecount = ri->ri_depth;
1159 src = (uint8_t *)ri->ri_bits + srcrow * rowheight * ri->ri_stride;
1160 dst = (uint8_t *)ri->ri_bits + dstrow * rowheight * ri->ri_stride;
1161
1162 if (nrows <= 0 || srcrow == dstrow) {
1163 return;
1164 } else if (srcrow < dstrow) {
1165 /* y-backward */
1166
1167 /* select the bottom raster of the bottom row */
1168 srcrow += nrows - 1;
1169 dstrow += nrows - 1;
1170 src += nrows * rowheight * ri->ri_stride - ri->ri_stride;
1171 dst += nrows * rowheight * ri->ri_stride - ri->ri_stride;
1172 rowstep = -1;
1173 rowheight = -rowheight;
1174 } else {
1175 /* y-forward */
1176 rowstep = 1;
1177 }
1178 ptrstep = ri->ri_stride * rowheight;
1179
1180 om_set_planemask(hwplanemask);
1181
1182 srcplane = 0;
1183 while (nrows > 0) {
1184 r = 1;
1185 if (rowattr[srcrow].ismulti == false &&
1186 rowattr[srcrow].fg == rowattr[srcrow].bg &&
1187 rowattr[srcrow].all == rowattr[dstrow].all) {
1188 goto skip;
1189 }
1190
1191 /* count the number of rows with the same attributes */
1192 for (; r < nrows; r++) {
1193 if (rowattr[srcrow + r * rowstep].all !=
1194 rowattr[srcrow].all) {
1195 break;
1196 }
1197 }
1198 /* r is the number of rows including srcrow itself */
1199
1200 if (rowattr[srcrow].ismulti) {
1201 /*
1202 * src,dst point to the common plane. src0,dst0 will
1203 * point to the same offset in plane0 because plane0
1204 * is placed just after the common plane.
1205 */
1206 uint8_t *src0 = src + OMFB_PLANEOFFS;
1207 uint8_t *dst0 = dst + OMFB_PLANEOFFS;
1208 om_set_rop_curplane(ROP_THROUGH, ALL1BITS);
1209 om4_rascopy_multi(dst0, src0, width, rowheight * r);
1210 } else {
1211 uint8_t *srcp;
1212 uint8_t fg;
1213 uint8_t bg;
1214 uint8_t set;
1215
1216 fg = rowattr[srcrow].fg;
1217 bg = rowattr[srcrow].bg;
1218 set = fg ^ bg;
1219 if (set == 0) {
1220 /* use fg since both can be acceptable */
1221 set = fg;
1222 } else if ((set & fg) != 0) {
1223 /*
1224 * set is the set of bits that set in fg and
1225 * cleared in bg.
1226 */
1227 set &= fg;
1228 } else {
1229 /*
1230 * otherwise, set is the set of bits that
1231 * (probably) set in bg and cleared in fg.
1232 */
1233 uint8_t tmp;
1234
1235 set &= bg;
1236 /* and swap fg and bg */
1237 tmp = fg;
1238 fg = bg;
1239 bg = tmp;
1240 }
1241
1242 for (i = 0; i < planecount; i++) {
1243 int t = om_fgbg2rop(fg, bg);
1244 rop[i] = t;
1245 om_set_rop(i, rop[i], ALL1BITS);
1246 fg >>= 1;
1247 bg >>= 1;
1248 }
1249
1250 /*
1251 * If any bit in 'set' is set, any of them can be used.
1252 * If all bits in 'set' are cleared, use plane 0.
1253 * srcplane is the plane that fg is set and bg is
1254 * cleared.
1255 */
1256 srcplane = (set != 0) ? (31 - __builtin_clz(set)) : 0;
1257
1258 srcp = src + OMFB_PLANEOFFS + srcplane * OMFB_PLANEOFFS;
1259 om_rascopy_single(planecount, dst, srcp,
1260 width, rowheight * r, rop);
1261 }
1262
1263 skip:
1264 for (i = 0; i < r; i++) {
1265 rowattr[dstrow] = rowattr[srcrow];
1266
1267 srcrow += rowstep;
1268 dstrow += rowstep;
1269 src += ptrstep;
1270 dst += ptrstep;
1271 nrows--;
1272 }
1273 }
1274 }
1275
1276 /*
1277 * XXX om{1,4}_copycols can be merged, but these are not frequently executed
1278 * and have low execution costs. So I'm putting it off for now.
1279 */
1280
1281 static void
1282 om1_copycols(void *cookie, int startrow, int srccol, int dstcol, int ncols)
1283 {
1284 struct rasops_info *ri = cookie;
1285 uint8_t *sp, *dp, *sq, *dq, *basep;
1286 int scanspan, height, w, y, srcx, dstx;
1287 int sb, eb, db, sboff, full, cnt, lnum, rnum;
1288 uint32_t lmask, rmask, tmp;
1289 bool sbover;
1290
1291 scanspan = ri->ri_stride;
1292 y = ri->ri_font->fontheight * startrow;
1293 srcx = ri->ri_font->fontwidth * srccol;
1294 dstx = ri->ri_font->fontwidth * dstcol;
1295 height = ri->ri_font->fontheight;
1296 w = ri->ri_font->fontwidth * ncols;
1297 basep = (uint8_t *)ri->ri_bits + y * scanspan;
1298
1299 sb = srcx & ALIGNMASK;
1300 db = dstx & ALIGNMASK;
1301
1302 om_reset_planemask_and_rop();
1303
1304 if (db + w <= BLITWIDTH) {
1305 /* Destination is contained within a single word */
1306 sp = basep + (srcx / 32) * 4;
1307 dp = basep + (dstx / 32) * 4;
1308
1309 while (height > 0) {
1310 GETBITS(P0(sp), sb, w, tmp);
1311 PUTBITS(tmp, db, w, P0(dp));
1312 dp += scanspan;
1313 sp += scanspan;
1314 height--;
1315 }
1316 return;
1317 }
1318
1319 lmask = (db == 0) ? 0 : ALL1BITS >> db;
1320 eb = (db + w) & ALIGNMASK;
1321 rmask = (eb == 0) ? 0 : ALL1BITS << (32 - eb);
1322 lnum = (32 - db) & ALIGNMASK;
1323 rnum = (dstx + w) & ALIGNMASK;
1324
1325 if (lmask != 0)
1326 full = (w - (32 - db)) / 32;
1327 else
1328 full = w / 32;
1329
1330 sbover = (sb + lnum) >= 32;
1331
1332 if (dstcol < srccol || srccol + ncols < dstcol) {
1333 /* copy forward (left-to-right) */
1334 sp = basep + (srcx / 32) * 4;
1335 dp = basep + (dstx / 32) * 4;
1336
1337 if (lmask != 0) {
1338 sboff = sb + lnum;
1339 if (sboff >= 32)
1340 sboff -= 32;
1341 } else {
1342 sboff = sb;
1343 }
1344
1345 sq = sp;
1346 dq = dp;
1347 while (height > 0) {
1348 if (lmask != 0) {
1349 GETBITS(P0(sp), sb, lnum, tmp);
1350 PUTBITS(tmp, db, lnum, P0(dp));
1351 dp += BYTESDONE;
1352 if (sbover)
1353 sp += BYTESDONE;
1354 }
1355
1356 for (cnt = full; cnt; cnt--) {
1357 GETBITS(P0(sp), sboff, 32, tmp);
1358 *P0(dp) = tmp;
1359 sp += BYTESDONE;
1360 dp += BYTESDONE;
1361 }
1362
1363 if (rmask != 0) {
1364 GETBITS(P0(sp), sboff, rnum, tmp);
1365 PUTBITS(tmp, 0, rnum, P0(dp));
1366 }
1367
1368 sp = (sq += scanspan);
1369 dp = (dq += scanspan);
1370 height--;
1371 }
1372 } else {
1373 /* copy backward (right-to-left) */
1374 sp = basep + ((srcx + w) / 32) * 4;
1375 dp = basep + ((dstx + w) / 32) * 4;
1376
1377 sboff = (srcx + w) & ALIGNMASK;
1378 sboff -= rnum;
1379 if (sboff < 0) {
1380 sp -= BYTESDONE;
1381 sboff += 32;
1382 }
1383
1384 sq = sp;
1385 dq = dp;
1386 while (height > 0) {
1387 if (rnum != 0) {
1388 GETBITS(P0(sp), sboff, rnum, tmp);
1389 PUTBITS(tmp, 0, rnum, P0(dp));
1390 }
1391
1392 for (cnt = full; cnt; cnt--) {
1393 sp -= BYTESDONE;
1394 dp -= BYTESDONE;
1395 GETBITS(P0(sp), sboff, 32, tmp);
1396 *P0(dp) = tmp;
1397 }
1398
1399 if (lmask != 0) {
1400 if (sbover)
1401 sp -= BYTESDONE;
1402 dp -= BYTESDONE;
1403 GETBITS(P0(sp), sb, lnum, tmp);
1404 PUTBITS(tmp, db, lnum, P0(dp));
1405 }
1406
1407 sp = (sq += scanspan);
1408 dp = (dq += scanspan);
1409 height--;
1410 }
1411 }
1412 }
1413
1414 static void
1415 om4_copycols(void *cookie, int startrow, int srccol, int dstcol, int ncols)
1416 {
1417 struct rasops_info *ri = cookie;
1418 uint8_t *sp, *dp, *sq, *dq, *basep;
1419 int scanspan, height, w, y, srcx, dstx;
1420 int sb, eb, db, sboff, full, cnt, lnum, rnum;
1421 uint32_t lmask, rmask, tmp;
1422 bool sbover;
1423
1424 scanspan = ri->ri_stride;
1425 y = ri->ri_font->fontheight * startrow;
1426 srcx = ri->ri_font->fontwidth * srccol;
1427 dstx = ri->ri_font->fontwidth * dstcol;
1428 height = ri->ri_font->fontheight;
1429 w = ri->ri_font->fontwidth * ncols;
1430 basep = (uint8_t *)ri->ri_bits + y * scanspan;
1431
1432 sb = srcx & ALIGNMASK;
1433 db = dstx & ALIGNMASK;
1434
1435 om_reset_planemask_and_rop();
1436
1437 if (db + w <= BLITWIDTH) {
1438 /* Destination is contained within a single word */
1439 sp = basep + (srcx / 32) * 4;
1440 dp = basep + (dstx / 32) * 4;
1441
1442 while (height > 0) {
1443 GETBITS(P0(sp), sb, w, tmp);
1444 PUTBITS(tmp, db, w, P0(dp));
1445 GETBITS(P1(sp), sb, w, tmp);
1446 PUTBITS(tmp, db, w, P1(dp));
1447 GETBITS(P2(sp), sb, w, tmp);
1448 PUTBITS(tmp, db, w, P2(dp));
1449 GETBITS(P3(sp), sb, w, tmp);
1450 PUTBITS(tmp, db, w, P3(dp));
1451 dp += scanspan;
1452 sp += scanspan;
1453 height--;
1454 }
1455 return;
1456 }
1457
1458 lmask = (db == 0) ? 0 : ALL1BITS >> db;
1459 eb = (db + w) & ALIGNMASK;
1460 rmask = (eb == 0) ? 0 : ALL1BITS << (32 - eb);
1461 lnum = (32 - db) & ALIGNMASK;
1462 rnum = (dstx + w) & ALIGNMASK;
1463
1464 if (lmask != 0)
1465 full = (w - (32 - db)) / 32;
1466 else
1467 full = w / 32;
1468
1469 sbover = (sb + lnum) >= 32;
1470
1471 if (dstcol < srccol || srccol + ncols < dstcol) {
1472 /* copy forward (left-to-right) */
1473 sp = basep + (srcx / 32) * 4;
1474 dp = basep + (dstx / 32) * 4;
1475
1476 if (lmask != 0) {
1477 sboff = sb + lnum;
1478 if (sboff >= 32)
1479 sboff -= 32;
1480 } else {
1481 sboff = sb;
1482 }
1483
1484 sq = sp;
1485 dq = dp;
1486 while (height > 0) {
1487 if (lmask != 0) {
1488 GETBITS(P0(sp), sb, lnum, tmp);
1489 PUTBITS(tmp, db, lnum, P0(dp));
1490 GETBITS(P1(sp), sb, lnum, tmp);
1491 PUTBITS(tmp, db, lnum, P1(dp));
1492 GETBITS(P2(sp), sb, lnum, tmp);
1493 PUTBITS(tmp, db, lnum, P2(dp));
1494 GETBITS(P3(sp), sb, lnum, tmp);
1495 PUTBITS(tmp, db, lnum, P3(dp));
1496 dp += BYTESDONE;
1497 if (sbover)
1498 sp += BYTESDONE;
1499 }
1500
1501 for (cnt = full; cnt; cnt--) {
1502 GETBITS(P0(sp), sboff, 32, tmp);
1503 *P0(dp) = tmp;
1504 GETBITS(P1(sp), sboff, 32, tmp);
1505 *P1(dp) = tmp;
1506 GETBITS(P2(sp), sboff, 32, tmp);
1507 *P2(dp) = tmp;
1508 GETBITS(P3(sp), sboff, 32, tmp);
1509 *P3(dp) = tmp;
1510 sp += BYTESDONE;
1511 dp += BYTESDONE;
1512 }
1513
1514 if (rmask != 0) {
1515 GETBITS(P0(sp), sboff, rnum, tmp);
1516 PUTBITS(tmp, 0, rnum, P0(dp));
1517 GETBITS(P1(sp), sboff, rnum, tmp);
1518 PUTBITS(tmp, 0, rnum, P1(dp));
1519 GETBITS(P2(sp), sboff, rnum, tmp);
1520 PUTBITS(tmp, 0, rnum, P2(dp));
1521 GETBITS(P3(sp), sboff, rnum, tmp);
1522 PUTBITS(tmp, 0, rnum, P3(dp));
1523 }
1524
1525 sp = (sq += scanspan);
1526 dp = (dq += scanspan);
1527 height--;
1528 }
1529 } else {
1530 /* copy backward (right-to-left) */
1531 sp = basep + ((srcx + w) / 32) * 4;
1532 dp = basep + ((dstx + w) / 32) * 4;
1533
1534 sboff = (srcx + w) & ALIGNMASK;
1535 sboff -= rnum;
1536 if (sboff < 0) {
1537 sp -= BYTESDONE;
1538 sboff += 32;
1539 }
1540
1541 sq = sp;
1542 dq = dp;
1543 while (height > 0) {
1544 if (rnum != 0) {
1545 GETBITS(P0(sp), sboff, rnum, tmp);
1546 PUTBITS(tmp, 0, rnum, P0(dp));
1547 GETBITS(P1(sp), sboff, rnum, tmp);
1548 PUTBITS(tmp, 0, rnum, P1(dp));
1549 GETBITS(P2(sp), sboff, rnum, tmp);
1550 PUTBITS(tmp, 0, rnum, P2(dp));
1551 GETBITS(P3(sp), sboff, rnum, tmp);
1552 PUTBITS(tmp, 0, rnum, P3(dp));
1553 }
1554
1555 for (cnt = full; cnt; cnt--) {
1556 sp -= BYTESDONE;
1557 dp -= BYTESDONE;
1558 GETBITS(P0(sp), sboff, 32, tmp);
1559 *P0(dp) = tmp;
1560 GETBITS(P1(sp), sboff, 32, tmp);
1561 *P1(dp) = tmp;
1562 GETBITS(P2(sp), sboff, 32, tmp);
1563 *P2(dp) = tmp;
1564 GETBITS(P3(sp), sboff, 32, tmp);
1565 *P3(dp) = tmp;
1566 }
1567
1568 if (lmask != 0) {
1569 if (sbover)
1570 sp -= BYTESDONE;
1571 dp -= BYTESDONE;
1572 GETBITS(P0(sp), sb, lnum, tmp);
1573 PUTBITS(tmp, db, lnum, P0(dp));
1574 GETBITS(P1(sp), sb, lnum, tmp);
1575 PUTBITS(tmp, db, lnum, P1(dp));
1576 GETBITS(P2(sp), sb, lnum, tmp);
1577 PUTBITS(tmp, db, lnum, P2(dp));
1578 GETBITS(P3(sp), sb, lnum, tmp);
1579 PUTBITS(tmp, db, lnum, P3(dp));
1580 }
1581
1582 sp = (sq += scanspan);
1583 dp = (dq += scanspan);
1584 height--;
1585 }
1586 }
1587 }
1588
1589 /*
1590 * Map a character.
1591 */
1592 static int
1593 om_mapchar(void *cookie, int c, u_int *cp)
1594 {
1595 struct rasops_info *ri = cookie;
1596 struct wsdisplay_font *wf = ri->ri_font;
1597
1598 if (wf->encoding != WSDISPLAY_FONTENC_ISO) {
1599 c = wsfont_map_unichar(wf, c);
1600
1601 if (c < 0)
1602 goto fail;
1603 }
1604 if (c < wf->firstchar || c >= (wf->firstchar + wf->numchars))
1605 goto fail;
1606
1607 *cp = c;
1608 return 5;
1609
1610 fail:
1611 *cp = ' ';
1612 return 0;
1613 }
1614
1615 /*
1616 * Position|{enable|disable} the cursor at the specified location.
1617 */
1618 static void
1619 om_cursor(void *cookie, int on, int row, int col)
1620 {
1621 struct rasops_info *ri = cookie;
1622 int startx;
1623 int width;
1624 int height;
1625 int sh, sl;
1626 int y;
1627 int scanspan;
1628 uint8_t *p;
1629
1630 if (!on) {
1631 /* make sure it's on */
1632 if ((ri->ri_flg & RI_CURSOR) == 0)
1633 return;
1634
1635 row = ri->ri_crow;
1636 col = ri->ri_ccol;
1637 } else {
1638 /* unpaint the old copy. */
1639 ri->ri_crow = row;
1640 ri->ri_ccol = col;
1641 }
1642
1643 scanspan = ri->ri_stride;
1644 y = ri->ri_font->fontheight * row;
1645 startx = ri->ri_font->fontwidth * col;
1646 width = ri->ri_font->fontwidth;
1647 height = ri->ri_font->fontheight;
1648 sh = startx >> 5;
1649 sl = startx & 0x1f;
1650 p = (uint8_t *)ri->ri_bits + y * scanspan + sh * 4;
1651
1652 /* ROP_INV2 ignores data from MPU and inverts the current VRAM data */
1653 om_fill(hwplanemask, ROP_INV2, p, sl, scanspan, 0, width, height);
1654
1655 ri->ri_flg ^= RI_CURSOR;
1656
1657 /* reset mask value */
1658 om_reset_planemask_and_rop();
1659 }
1660
1661 /*
1662 * Allocate attribute. We just pack these into an integer.
1663 *
1664 * Attribute bitmap:
1665 * b31: Multi color (used by copyrows)
1666 * b30-18: 0 (reserved)
1667 * b17: Underline (not supported yet)
1668 * b16: Bold (or HILIT if 1bpp; not supported yet)
1669 * b15-8: fg color code
1670 * b7-0: bg color code
1671 */
1672 #if 0
1673 /*
1674 * Future plan:
1675 * Place fg and bg side by side in advance to reduce the computation cost
1676 * at the time of ROP setting.
1677 *
1678 * bit: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1679 * f7 b7 f6 b6 f5 b5 f4 b4 f3 b3 f2 b2 f1 b1 f0 b0
1680 *
1681 * In this form, use bit1..0 if 1bpp, use bit7..0 if 4bpp.
1682 */
1683 #endif
1684 static int
1685 om_allocattr(void *cookie, int fg, int bg, int flags, long *attrp)
1686 {
1687 struct rasops_info *ri = cookie;
1688 int planecount = ri->ri_depth;
1689 uint32_t a;
1690 uint16_t c;
1691
1692 a = 0;
1693 c = 0;
1694
1695 if ((flags & WSATTR_BLINK) != 0)
1696 return EINVAL;
1697
1698 if ((flags & WSATTR_WSCOLORS) == 0) {
1699 fg = WSCOL_WHITE; /* maybe 7 or 1 */
1700 bg = WSCOL_BLACK; /* maybe 0 */
1701 }
1702
1703 if ((flags & WSATTR_REVERSE) != 0) {
1704 int tmp;
1705 tmp = fg;
1706 fg = bg;
1707 bg = tmp;
1708 }
1709
1710 if ((flags & WSATTR_HILIT) != 0) {
1711 if (planecount == 1) {
1712 #if 0
1713 a |= OMFB_ATTR_BOLD;
1714 #else
1715 return EINVAL;
1716 #endif
1717 } else if (fg < 8) {
1718 fg += 8;
1719 }
1720 }
1721
1722 if ((flags & WSATTR_UNDERLINE) != 0) {
1723 #if 0
1724 a |= OMFB_ATTR_UNDERLINE;
1725 #else
1726 return EINVAL;
1727 #endif
1728 }
1729
1730 fg &= hwplanemask;
1731 bg &= hwplanemask;
1732
1733 #if 0
1734 int i;
1735 for (i = 0; i < planecount; i++) {
1736 c += c;
1737 c += ((fg & 1) << 1) | (bg & 1);
1738 fg >>= 1;
1739 bg >>= 1;
1740 }
1741 #else
1742 c = (fg << 8) | bg;
1743 #endif
1744 a |= c;
1745
1746 *attrp = a;
1747 return 0;
1748 }
1749
1750 static void
1751 om_unpack_attr(long attr, uint8_t *fg, uint8_t *bg, int *underline)
1752 {
1753 uint8_t f, b;
1754
1755 f = (attr >> 8) & hwplanemask;
1756 b = attr & hwplanemask;
1757
1758 if (fg)
1759 *fg = f;
1760 if (bg)
1761 *bg = b;
1762 }
1763
1764 /*
1765 * Init subset of rasops(9) for omrasops.
1766 */
1767 int
1768 omrasops1_init(struct rasops_info *ri, int wantrows, int wantcols)
1769 {
1770
1771 omrasops_init(ri, wantrows, wantcols);
1772
1773 /* fill our own emulops */
1774 ri->ri_ops.cursor = om_cursor;
1775 ri->ri_ops.mapchar = om_mapchar;
1776 ri->ri_ops.putchar = om_putchar;
1777 ri->ri_ops.copycols = om1_copycols;
1778 ri->ri_ops.erasecols = om_erasecols;
1779 ri->ri_ops.copyrows = om1_copyrows;
1780 ri->ri_ops.eraserows = om_eraserows;
1781 ri->ri_ops.allocattr = om_allocattr;
1782 ri->ri_caps = WSSCREEN_REVERSE;
1783
1784 ri->ri_flg |= RI_CFGDONE;
1785
1786 return 0;
1787 }
1788
1789 int
1790 omrasops4_init(struct rasops_info *ri, int wantrows, int wantcols)
1791 {
1792
1793 omrasops_init(ri, wantrows, wantcols);
1794
1795 /* fill our own emulops */
1796 ri->ri_ops.cursor = om_cursor;
1797 ri->ri_ops.mapchar = om_mapchar;
1798 ri->ri_ops.putchar = om_putchar;
1799 ri->ri_ops.copycols = om4_copycols;
1800 ri->ri_ops.erasecols = om_erasecols;
1801 ri->ri_ops.copyrows = om4_copyrows;
1802 ri->ri_ops.eraserows = om_eraserows;
1803 ri->ri_ops.allocattr = om_allocattr;
1804 ri->ri_caps = WSSCREEN_HILIT | WSSCREEN_WSCOLORS | WSSCREEN_REVERSE;
1805
1806 ri->ri_flg |= RI_CFGDONE;
1807
1808 return 0;
1809 }
1810
1811 static int
1812 omrasops_init(struct rasops_info *ri, int wantrows, int wantcols)
1813 {
1814 int wsfcookie, bpp;
1815
1816 if (wantrows > OMRASOPS_MAX_ROWS)
1817 wantrows = OMRASOPS_MAX_ROWS;
1818 if (wantrows == 0)
1819 wantrows = 34;
1820 if (wantrows < 10)
1821 wantrows = 10;
1822 if (wantcols == 0)
1823 wantcols = 80;
1824 if (wantcols < 20)
1825 wantcols = 20;
1826
1827 /* Use default font */
1828 wsfont_init();
1829 wsfcookie = wsfont_find(NULL, 0, 0, 0, WSDISPLAY_FONTORDER_L2R,
1830 WSDISPLAY_FONTORDER_L2R, WSFONT_FIND_BITMAP);
1831 if (wsfcookie < 0)
1832 panic("%s: no font available", __func__);
1833 if (wsfont_lock(wsfcookie, &ri->ri_font))
1834 panic("%s: unable to lock font", __func__);
1835 ri->ri_wsfcookie = wsfcookie;
1836
1837 KASSERT(ri->ri_font->fontwidth > 4 && ri->ri_font->fontwidth <= 32);
1838
1839 /* all planes are independently addressed */
1840 bpp = 1;
1841
1842 /* Now constrain what they get */
1843 ri->ri_emuwidth = ri->ri_font->fontwidth * wantcols;
1844 ri->ri_emuheight = ri->ri_font->fontheight * wantrows;
1845 if (ri->ri_emuwidth > ri->ri_width)
1846 ri->ri_emuwidth = ri->ri_width;
1847 if (ri->ri_emuheight > ri->ri_height)
1848 ri->ri_emuheight = ri->ri_height;
1849
1850 /* Reduce width until aligned on a 32-bit boundary */
1851 while ((ri->ri_emuwidth * bpp & 31) != 0)
1852 ri->ri_emuwidth--;
1853
1854 ri->ri_cols = ri->ri_emuwidth / ri->ri_font->fontwidth;
1855 ri->ri_rows = ri->ri_emuheight / ri->ri_font->fontheight;
1856 ri->ri_emustride = ri->ri_emuwidth * bpp >> 3;
1857 ri->ri_ccol = 0;
1858 ri->ri_crow = 0;
1859 ri->ri_pelbytes = bpp >> 3;
1860
1861 ri->ri_xscale = (ri->ri_font->fontwidth * bpp) >> 3;
1862 ri->ri_yscale = ri->ri_font->fontheight * ri->ri_stride;
1863 ri->ri_fontscale = ri->ri_font->fontheight * ri->ri_font->stride;
1864
1865 /* Clear the entire display */
1866 if ((ri->ri_flg & RI_CLEAR) != 0)
1867 memset(ri->ri_bits, 0, ri->ri_stride * ri->ri_height);
1868
1869 /* Now centre our window if needs be */
1870 ri->ri_origbits = ri->ri_bits;
1871
1872 if ((ri->ri_flg & RI_CENTER) != 0) {
1873 ri->ri_bits += (((ri->ri_width * bpp >> 3) -
1874 ri->ri_emustride) >> 1) & ~3;
1875 ri->ri_bits += ((ri->ri_height - ri->ri_emuheight) >> 1) *
1876 ri->ri_stride;
1877 ri->ri_yorigin = (int)(ri->ri_bits - ri->ri_origbits)
1878 / ri->ri_stride;
1879 ri->ri_xorigin = (((int)(ri->ri_bits - ri->ri_origbits)
1880 % ri->ri_stride) * 8 / bpp);
1881 } else
1882 ri->ri_xorigin = ri->ri_yorigin = 0;
1883
1884 return 0;
1885 }
1886