1/*
2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28#include "radeon_screen.h"
29#include "radeon_tile.h"
30
31#include <stdint.h>
32#include <string.h>
33
34#include "main/macros.h"
35#include "radeon_debug.h"
36
37#define MICRO_TILE_SIZE 32
38
39static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
40                                  void * const dst, unsigned dst_pitch,
41                                  unsigned width, unsigned height)
42{
43    unsigned row; /* current source row */
44    unsigned col; /* current source column */
45    unsigned k; /* number of processed tiles */
46    const unsigned tile_width = 8, tile_height = 4;
47    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
48
49    k = 0;
50    for (row = 0; row < height; row += tile_height)
51    {
52        for (col = 0; col < width; col += tile_width, ++k)
53        {
54            uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
55            uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
56                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
57            unsigned j;
58
59            for (j = 0; j < MIN2(tile_height, height - row); ++j)
60            {
61                unsigned columns = MIN2(tile_width, width - col);
62                memcpy(dst2, src2, columns * sizeof(uint8_t));
63                dst2 += tile_width;
64                src2 += src_pitch;
65            }
66        }
67    }
68}
69
70static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
71                                   void * const dst, unsigned dst_pitch,
72                                   unsigned width, unsigned height)
73{
74    unsigned row; /* current source row */
75    unsigned col; /* current source column */
76    unsigned k; /* number of processed tiles */
77    const unsigned tile_width = 4, tile_height = 4;
78    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
79
80    k = 0;
81    for (row = 0; row < height; row += tile_height)
82    {
83        for (col = 0; col < width; col += tile_width, ++k)
84        {
85            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
86            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
87                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
88            unsigned j;
89
90            for (j = 0; j < MIN2(tile_height, height - row); ++j)
91            {
92                unsigned columns = MIN2(tile_width, width - col);
93                memcpy(dst2, src2, columns * sizeof(uint16_t));
94                dst2 += tile_width;
95                src2 += src_pitch;
96            }
97        }
98    }
99}
100
101static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
102                                   void * const dst, unsigned dst_pitch,
103                                   unsigned width, unsigned height)
104{
105    unsigned row; /* current source row */
106    unsigned col; /* current source column */
107    unsigned k; /* number of processed tiles */
108    const unsigned tile_width = 8, tile_height = 2;
109    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
110
111    k = 0;
112    for (row = 0; row < height; row += tile_height)
113    {
114        for (col = 0; col < width; col += tile_width, ++k)
115        {
116            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
117            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
118                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
119            unsigned j;
120
121            for (j = 0; j < MIN2(tile_height, height - row); ++j)
122            {
123                unsigned columns = MIN2(tile_width, width - col);
124                memcpy(dst2, src2, columns * sizeof(uint16_t));
125                dst2 += tile_width;
126                src2 += src_pitch;
127            }
128        }
129    }
130}
131
132static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
133                                   void * const dst, unsigned dst_pitch,
134                                   unsigned width, unsigned height)
135{
136    unsigned row; /* current source row */
137    unsigned col; /* current source column */
138    unsigned k; /* number of processed tiles */
139    const unsigned tile_width = 4, tile_height = 2;
140    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
141
142    k = 0;
143    for (row = 0; row < height; row += tile_height)
144    {
145        for (col = 0; col < width; col += tile_width, ++k)
146        {
147            uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
148            uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
149                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
150            unsigned j;
151
152            for (j = 0; j < MIN2(tile_height, height - row); ++j)
153            {
154                unsigned columns = MIN2(tile_width, width - col);
155                memcpy(dst2, src2, columns * sizeof(uint32_t));
156                dst2 += tile_width;
157                src2 += src_pitch;
158            }
159        }
160    }
161}
162
163static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
164                                   void * const dst, unsigned dst_pitch,
165                                   unsigned width, unsigned height)
166{
167    unsigned row; /* current source row */
168    unsigned col; /* current source column */
169    unsigned k; /* number of processed tiles */
170    const unsigned tile_width = 2, tile_height = 2;
171    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
172
173    k = 0;
174    for (row = 0; row < height; row += tile_height)
175    {
176        for (col = 0; col < width; col += tile_width, ++k)
177        {
178            uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
179            uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
180                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
181            unsigned j;
182
183            for (j = 0; j < MIN2(tile_height, height - row); ++j)
184            {
185                unsigned columns = MIN2(tile_width, width - col);
186                memcpy(dst2, src2, columns * sizeof(uint64_t));
187                dst2 += tile_width;
188                src2 += src_pitch;
189            }
190        }
191    }
192}
193
194static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
195                                    void * dst, unsigned dst_pitch,
196                                    unsigned width, unsigned height)
197{
198    unsigned i, j;
199    const unsigned elem_size = 16; /* sizeof(uint128_t) */
200
201    for (j = 0; j < height; ++j)
202    {
203        for (i = 0; i < width; ++i)
204        {
205            memcpy(dst, src, width * elem_size);
206            dst += dst_pitch * elem_size;
207            src += src_pitch * elem_size;
208        }
209    }
210}
211
212void tile_image(const void * src, unsigned src_pitch,
213                void *dst, unsigned dst_pitch,
214                mesa_format format, unsigned width, unsigned height)
215{
216    assert(src_pitch >= width);
217    assert(dst_pitch >= width);
218
219    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
220                 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
221                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
222
223    switch (_mesa_get_format_bytes(format))
224    {
225        case 16:
226            micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
227            break;
228        case 8:
229            micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
230            break;
231        case 4:
232            micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
233            break;
234        case 2:
235            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
236            {
237                micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
238            }
239            else
240            {
241                micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
242            }
243            break;
244        case 1:
245            micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
246            break;
247        default:
248            assert(0);
249            break;
250    }
251}
252
253static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
254                                    void * const dst, unsigned dst_pitch,
255                                    unsigned width, unsigned height)
256{
257    unsigned row; /* current destination row */
258    unsigned col; /* current destination column */
259    unsigned k; /* current tile number */
260    const unsigned tile_width = 8, tile_height = 4;
261    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
262
263    assert(src_pitch % tile_width == 0);
264
265    k = 0;
266    for (row = 0; row < height; row += tile_height)
267    {
268        for (col = 0; col < width; col += tile_width, ++k)
269        {
270            uint8_t *src2 = (uint8_t *)src + row * src_pitch +
271                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
272            uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
273            unsigned j;
274
275            for (j = 0; j < MIN2(tile_height, height - row); ++j)
276            {
277                unsigned columns = MIN2(tile_width, width - col);
278                memcpy(dst2, src2, columns * sizeof(uint8_t));
279                dst2 += dst_pitch;
280                src2 += tile_width;
281            }
282        }
283    }
284}
285
286static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
287                                     void * const dst, unsigned dst_pitch,
288                                     unsigned width, unsigned height)
289{
290    unsigned row; /* current destination row */
291    unsigned col; /* current destination column */
292    unsigned k; /* current tile number */
293    const unsigned tile_width = 8, tile_height = 2;
294    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
295
296    assert(src_pitch % tile_width == 0);
297
298    k = 0;
299    for (row = 0; row < height; row += tile_height)
300    {
301        for (col = 0; col < width; col += tile_width, ++k)
302        {
303            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
304                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
305            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
306            unsigned j;
307
308            for (j = 0; j < MIN2(tile_height, height - row); ++j)
309            {
310                unsigned columns = MIN2(tile_width, width - col);
311                memcpy(dst2, src2, columns * sizeof(uint16_t));
312                dst2 += dst_pitch;
313                src2 += tile_width;
314            }
315        }
316    }
317}
318
319static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
320                                     void * const dst, unsigned dst_pitch,
321                                     unsigned width, unsigned height)
322{
323    unsigned row; /* current destination row */
324    unsigned col; /* current destination column */
325    unsigned k; /* current tile number */
326    const unsigned tile_width = 4, tile_height = 4;
327    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
328
329    assert(src_pitch % tile_width == 0);
330
331    k = 0;
332    for (row = 0; row < height; row += tile_height)
333    {
334        for (col = 0; col < width; col += tile_width, ++k)
335        {
336            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
337                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
338            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
339            unsigned j;
340
341            for (j = 0; j < MIN2(tile_height, height - row); ++j)
342            {
343                unsigned columns = MIN2(tile_width, width - col);
344                memcpy(dst2, src2, columns * sizeof(uint16_t));
345                dst2 += dst_pitch;
346                src2 += tile_width;
347            }
348        }
349    }
350}
351
352static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
353                                     void * const dst, unsigned dst_pitch,
354                                     unsigned width, unsigned height)
355{
356    unsigned row; /* current destination row */
357    unsigned col; /* current destination column */
358    unsigned k; /* current tile number */
359    const unsigned tile_width = 4, tile_height = 2;
360    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
361
362    assert(src_pitch % tile_width == 0);
363
364    k = 0;
365    for (row = 0; row < height; row += tile_height)
366    {
367        for (col = 0; col < width; col += tile_width, ++k)
368        {
369            uint32_t *src2 = (uint32_t *)src + row * src_pitch +
370                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
371            uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
372            unsigned j;
373
374            for (j = 0; j < MIN2(tile_height, height - row); ++j)
375            {
376                unsigned columns = MIN2(tile_width, width - col);
377                memcpy(dst2, src2, columns * sizeof(uint32_t));
378                dst2 += dst_pitch;
379                src2 += tile_width;
380            }
381        }
382    }
383}
384
385static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
386                                     void * const dst, unsigned dst_pitch,
387                                     unsigned width, unsigned height)
388{
389    unsigned row; /* current destination row */
390    unsigned col; /* current destination column */
391    unsigned k; /* current tile number */
392    const unsigned tile_width = 2, tile_height = 2;
393    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
394
395    assert(src_pitch % tile_width == 0);
396
397    k = 0;
398    for (row = 0; row < height; row += tile_height)
399    {
400        for (col = 0; col < width; col += tile_width, ++k)
401        {
402            uint64_t *src2 = (uint64_t *)src + row * src_pitch +
403                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
404            uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
405            unsigned j;
406
407            for (j = 0; j < MIN2(tile_height, height - row); ++j)
408            {
409                unsigned columns = MIN2(tile_width, width - col);
410                memcpy(dst2, src2, columns * sizeof(uint64_t));
411                dst2 += dst_pitch;
412                src2 += tile_width;
413            }
414        }
415    }
416}
417
418static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
419                                      void * dst, unsigned dst_pitch,
420                                      unsigned width, unsigned height)
421{
422    unsigned i, j;
423    const unsigned elem_size = 16; /* sizeof(uint128_t) */
424
425    for (j = 0; j < height; ++j)
426    {
427        for (i = 0; i < width; ++i)
428        {
429            memcpy(dst, src, width * elem_size);
430            dst += dst_pitch * elem_size;
431            src += src_pitch * elem_size;
432        }
433    }
434}
435
436void untile_image(const void * src, unsigned src_pitch,
437                  void *dst, unsigned dst_pitch,
438                  mesa_format format, unsigned width, unsigned height)
439{
440    assert(src_pitch >= width);
441    assert(dst_pitch >= width);
442
443    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
444                 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
445                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
446
447    switch (_mesa_get_format_bytes(format))
448    {
449        case 16:
450            micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
451            break;
452        case 8:
453            micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
454            break;
455        case 4:
456            micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
457            break;
458        case 2:
459            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
460            {
461                micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
462            }
463            else
464            {
465                micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
466            }
467            break;
468        case 1:
469            micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
470            break;
471        default:
472            assert(0);
473            break;
474    }
475}
476
477void get_tile_size(mesa_format format, unsigned *block_width, unsigned *block_height)
478{
479    switch (_mesa_get_format_bytes(format))
480    {
481        case 16:
482            *block_width = 1;
483            *block_height = 1;
484            break;
485        case 8:
486            *block_width = 2;
487            *block_height = 2;
488            break;
489        case 4:
490            *block_width = 4;
491            *block_height = 2;
492            break;
493        case 2:
494            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
495            {
496                *block_width = 4;
497                *block_height = 4;
498            }
499            else
500            {
501                *block_width = 8;
502                *block_height = 2;
503            }
504            break;
505        case 1:
506            *block_width = 8;
507            *block_height = 4;
508            break;
509        default:
510            assert(0);
511            break;
512    }
513}
514