v3d_tiling.c revision b8e80941
1/*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file v3d_tiling.c
25 *
26 * Handles information about the VC5 tiling formats, and loading and storing
27 * from them.
28 */
29
30#include <stdint.h>
31#include "v3d_screen.h"
32#include "v3d_context.h"
33#include "v3d_tiling.h"
34#include "broadcom/common/v3d_cpu_tiling.h"
35
36/** Return the width in pixels of a 64-byte microtile. */
37uint32_t
38v3d_utile_width(int cpp)
39{
40        switch (cpp) {
41        case 1:
42        case 2:
43                return 8;
44        case 4:
45        case 8:
46                return 4;
47        case 16:
48                return 2;
49        default:
50                unreachable("unknown cpp");
51        }
52}
53
54/** Return the height in pixels of a 64-byte microtile. */
55uint32_t
56v3d_utile_height(int cpp)
57{
58        switch (cpp) {
59        case 1:
60                return 8;
61        case 2:
62        case 4:
63                return 4;
64        case 8:
65        case 16:
66                return 2;
67        default:
68                unreachable("unknown cpp");
69        }
70}
71
72/**
73 * Returns the byte address for a given pixel within a utile.
74 *
75 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
76 * arrangement.
77 */
78static inline uint32_t
79v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
80{
81        uint32_t utile_w = v3d_utile_width(cpp);
82
83        assert(x < utile_w && y < v3d_utile_height(cpp));
84
85        return x * cpp + y * utile_w * cpp;
86}
87
88/**
89 * Returns the byte offset for a given pixel in a LINEARTILE layout.
90 *
91 * LINEARTILE is a single line of utiles in either the X or Y direction.
92 */
93static inline uint32_t
94v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
95{
96        uint32_t utile_w = v3d_utile_width(cpp);
97        uint32_t utile_h = v3d_utile_height(cpp);
98        uint32_t utile_index_x = x / utile_w;
99        uint32_t utile_index_y = y / utile_h;
100
101        assert(utile_index_x == 0 || utile_index_y == 0);
102
103        return (64 * (utile_index_x + utile_index_y) +
104                v3d_get_utile_pixel_offset(cpp,
105                                           x & (utile_w - 1),
106                                           y & (utile_h - 1)));
107}
108
109/**
110 * Returns the byte offset for a given pixel in a UBLINEAR layout.
111 *
112 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
113 * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
114 */
115static inline uint32_t
116v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
117                              int ublinear_number)
118{
119        uint32_t utile_w = v3d_utile_width(cpp);
120        uint32_t utile_h = v3d_utile_height(cpp);
121        uint32_t ub_w = utile_w * 2;
122        uint32_t ub_h = utile_h * 2;
123        uint32_t ub_x = x / ub_w;
124        uint32_t ub_y = y / ub_h;
125
126        return (256 * (ub_y * ublinear_number +
127                       ub_x) +
128                ((x & utile_w) ? 64 : 0) +
129                ((y & utile_h) ? 128 : 0) +
130                + v3d_get_utile_pixel_offset(cpp,
131                                             x & (utile_w - 1),
132                                             y & (utile_h - 1)));
133}
134
135static inline uint32_t
136v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
137                                       uint32_t x, uint32_t y)
138{
139        return v3d_get_ublinear_pixel_offset(cpp, x, y, 2);
140}
141
142static inline uint32_t
143v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
144                                       uint32_t x, uint32_t y)
145{
146        return v3d_get_ublinear_pixel_offset(cpp, x, y, 1);
147}
148
149/**
150 * Returns the byte offset for a given pixel in a UIF layout.
151 *
152 * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
153 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
154 * 4x4 groups, and those 4x4 groups are then stored in raster order.
155 */
156static inline uint32_t
157v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
158                         bool do_xor)
159{
160        uint32_t utile_w = v3d_utile_width(cpp);
161        uint32_t utile_h = v3d_utile_height(cpp);
162        uint32_t mb_width = utile_w * 2;
163        uint32_t mb_height = utile_h * 2;
164        uint32_t log2_mb_width = ffs(mb_width) - 1;
165        uint32_t log2_mb_height = ffs(mb_height) - 1;
166
167        /* Macroblock X, y */
168        uint32_t mb_x = x >> log2_mb_width;
169        uint32_t mb_y = y >> log2_mb_height;
170        /* X, y within the macroblock */
171        uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
172        uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
173
174        if (do_xor && (mb_x / 4) & 1)
175                mb_y ^= 0x10;
176
177        uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
178        uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
179
180        uint32_t mb_base_addr = mb_id * 256;
181
182        bool top = mb_pixel_y < utile_h;
183        bool left = mb_pixel_x < utile_w;
184
185        /* Docs have this in pixels, we do bytes here. */
186        uint32_t mb_tile_offset = (!top * 128 + !left * 64);
187
188        uint32_t utile_x = mb_pixel_x & (utile_w - 1);
189        uint32_t utile_y = mb_pixel_y & (utile_h - 1);
190
191        uint32_t mb_pixel_address = (mb_base_addr +
192                                     mb_tile_offset +
193                                     v3d_get_utile_pixel_offset(cpp,
194                                                                utile_x,
195                                                                utile_y));
196
197        return mb_pixel_address;
198}
199
200static inline uint32_t
201v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
202                             uint32_t x, uint32_t y)
203{
204        return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true);
205}
206
207static inline uint32_t
208v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
209                                uint32_t x, uint32_t y)
210{
211        return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false);
212}
213
214/* Loads/stores non-utile-aligned boxes by walking over the destination
215 * rectangle, computing the address on the GPU, and storing/loading a pixel at
216 * a time.
217 */
218static inline void
219v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride,
220                          void *cpu, uint32_t cpu_stride,
221                          int cpp, uint32_t image_h,
222                          const struct pipe_box *box,
223                          uint32_t (*get_pixel_offset)(uint32_t cpp,
224                                                       uint32_t image_h,
225                                                       uint32_t x, uint32_t y),
226                          bool is_load)
227{
228        for (uint32_t y = 0; y < box->height; y++) {
229                void *cpu_row = cpu + y * cpu_stride;
230
231                for (int x = 0; x < box->width; x++) {
232                        uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
233                                                                 box->x + x,
234                                                                 box->y + y);
235
236                        if (false) {
237                                fprintf(stderr, "%3d,%3d -> %d\n",
238                                        box->x + x, box->y + y,
239                                        pixel_offset);
240                        }
241
242                        if (is_load) {
243                                memcpy(cpu_row + x * cpp,
244                                       gpu + pixel_offset,
245                                       cpp);
246                        } else {
247                                memcpy(gpu + pixel_offset,
248                                       cpu_row + x * cpp,
249                                       cpp);
250                        }
251                }
252        }
253}
254
255/* Breaks the image down into utiles and calls either the fast whole-utile
256 * load/store functions, or the unaligned fallback case.
257 */
258static inline void
259v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
260                               void *cpu, uint32_t cpu_stride,
261                               int cpp, uint32_t image_h,
262                               const struct pipe_box *box,
263                               uint32_t (*get_pixel_offset)(uint32_t cpp,
264                                                            uint32_t image_h,
265                                                            uint32_t x, uint32_t y),
266                               bool is_load)
267{
268        uint32_t utile_w = v3d_utile_width(cpp);
269        uint32_t utile_h = v3d_utile_height(cpp);
270        uint32_t utile_gpu_stride = utile_w * cpp;
271        uint32_t x1 = box->x;
272        uint32_t y1 = box->y;
273        uint32_t x2 = box->x + box->width;
274        uint32_t y2 = box->y + box->height;
275        uint32_t align_x1 = align(x1, utile_w);
276        uint32_t align_y1 = align(y1, utile_h);
277        uint32_t align_x2 = x2 & ~(utile_w - 1);
278        uint32_t align_y2 = y2 & ~(utile_h - 1);
279
280        /* Load/store all the whole utiles first. */
281        for (uint32_t y = align_y1; y < align_y2; y += utile_h) {
282                void *cpu_row = cpu + (y - box->y) * cpu_stride;
283
284                for (uint32_t x = align_x1; x < align_x2; x += utile_w) {
285                        void *utile_gpu = (gpu +
286                                           get_pixel_offset(cpp, image_h, x, y));
287                        void *utile_cpu = cpu_row + (x - box->x) * cpp;
288
289                        if (is_load) {
290                                v3d_load_utile(utile_cpu, cpu_stride,
291                                               utile_gpu, utile_gpu_stride);
292                        } else {
293                                v3d_store_utile(utile_gpu, utile_gpu_stride,
294                                                utile_cpu, cpu_stride);
295                        }
296                }
297        }
298
299        /* If there were no aligned utiles in the middle, load/store the whole
300         * thing unaligned.
301         */
302        if (align_y2 <= align_y1 ||
303            align_x2 <= align_x1) {
304                v3d_move_pixels_unaligned(gpu, gpu_stride,
305                                          cpu, cpu_stride,
306                                          cpp, image_h,
307                                          box,
308                                          get_pixel_offset, is_load);
309                return;
310        }
311
312        /* Load/store the partial utiles. */
313        struct pipe_box partial_boxes[4] = {
314                /* Top */
315                {
316                        .x = x1,
317                        .width = x2 - x1,
318                        .y = y1,
319                        .height = align_y1 - y1,
320                },
321                /* Bottom */
322                {
323                        .x = x1,
324                        .width = x2 - x1,
325                        .y = align_y2,
326                        .height = y2 - align_y2,
327                },
328                /* Left */
329                {
330                        .x = x1,
331                        .width = align_x1 - x1,
332                        .y = align_y1,
333                        .height = align_y2 - align_y1,
334                },
335                /* Right */
336                {
337                        .x = align_x2,
338                        .width = x2 - align_x2,
339                        .y = align_y1,
340                        .height = align_y2 - align_y1,
341                },
342        };
343        for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) {
344                void *partial_cpu = (cpu +
345                                     (partial_boxes[i].y - y1) * cpu_stride +
346                                     (partial_boxes[i].x - x1) * cpp);
347
348                v3d_move_pixels_unaligned(gpu, gpu_stride,
349                                          partial_cpu, cpu_stride,
350                                          cpp, image_h,
351                                          &partial_boxes[i],
352                                          get_pixel_offset, is_load);
353        }
354}
355
356static inline void
357v3d_move_pixels_general(void *gpu, uint32_t gpu_stride,
358                               void *cpu, uint32_t cpu_stride,
359                               int cpp, uint32_t image_h,
360                               const struct pipe_box *box,
361                               uint32_t (*get_pixel_offset)(uint32_t cpp,
362                                                            uint32_t image_h,
363                                                            uint32_t x, uint32_t y),
364                               bool is_load)
365{
366        switch (cpp) {
367        case 1:
368                v3d_move_pixels_general_percpp(gpu, gpu_stride,
369                                               cpu, cpu_stride,
370                                               1, image_h, box,
371                                               get_pixel_offset,
372                                               is_load);
373                break;
374        case 2:
375                v3d_move_pixels_general_percpp(gpu, gpu_stride,
376                                               cpu, cpu_stride,
377                                               2, image_h, box,
378                                               get_pixel_offset,
379                                               is_load);
380                break;
381        case 4:
382                v3d_move_pixels_general_percpp(gpu, gpu_stride,
383                                               cpu, cpu_stride,
384                                               4, image_h, box,
385                                               get_pixel_offset,
386                                               is_load);
387                break;
388        case 8:
389                v3d_move_pixels_general_percpp(gpu, gpu_stride,
390                                               cpu, cpu_stride,
391                                               8, image_h, box,
392                                               get_pixel_offset,
393                                               is_load);
394                break;
395        case 16:
396                v3d_move_pixels_general_percpp(gpu, gpu_stride,
397                                               cpu, cpu_stride,
398                                               16, image_h, box,
399                                               get_pixel_offset,
400                                               is_load);
401                break;
402        }
403}
404
405static inline void
406v3d_move_tiled_image(void *gpu, uint32_t gpu_stride,
407                     void *cpu, uint32_t cpu_stride,
408                     enum v3d_tiling_mode tiling_format,
409                     int cpp,
410                     uint32_t image_h,
411                     const struct pipe_box *box,
412                     bool is_load)
413{
414        switch (tiling_format) {
415        case VC5_TILING_UIF_XOR:
416                v3d_move_pixels_general(gpu, gpu_stride,
417                                        cpu, cpu_stride,
418                                        cpp, image_h, box,
419                                        v3d_get_uif_xor_pixel_offset,
420                                        is_load);
421                break;
422        case VC5_TILING_UIF_NO_XOR:
423                v3d_move_pixels_general(gpu, gpu_stride,
424                                        cpu, cpu_stride,
425                                        cpp, image_h, box,
426                                        v3d_get_uif_no_xor_pixel_offset,
427                                        is_load);
428                break;
429        case VC5_TILING_UBLINEAR_2_COLUMN:
430                v3d_move_pixels_general(gpu, gpu_stride,
431                                        cpu, cpu_stride,
432                                        cpp, image_h, box,
433                                        v3d_get_ublinear_2_column_pixel_offset,
434                                        is_load);
435                break;
436        case VC5_TILING_UBLINEAR_1_COLUMN:
437                v3d_move_pixels_general(gpu, gpu_stride,
438                                        cpu, cpu_stride,
439                                        cpp, image_h, box,
440                                        v3d_get_ublinear_1_column_pixel_offset,
441                                        is_load);
442                break;
443        case VC5_TILING_LINEARTILE:
444                v3d_move_pixels_general(gpu, gpu_stride,
445                                        cpu, cpu_stride,
446                                        cpp, image_h, box,
447                                        v3d_get_lt_pixel_offset,
448                                        is_load);
449                break;
450        default:
451                unreachable("Unsupported tiling format");
452                break;
453        }
454}
455
456/**
457 * Loads pixel data from the start (microtile-aligned) box in \p src to the
458 * start of \p dst according to the given tiling format.
459 */
460void
461v3d_load_tiled_image(void *dst, uint32_t dst_stride,
462                     void *src, uint32_t src_stride,
463                     enum v3d_tiling_mode tiling_format, int cpp,
464                     uint32_t image_h,
465                     const struct pipe_box *box)
466{
467        v3d_move_tiled_image(src, src_stride,
468                             dst, dst_stride,
469                             tiling_format,
470                             cpp,
471                             image_h,
472                             box,
473                             true);
474}
475
476/**
477 * Stores pixel data from the start of \p src into a (microtile-aligned) box in
478 * \p dst according to the given tiling format.
479 */
480void
481v3d_store_tiled_image(void *dst, uint32_t dst_stride,
482                      void *src, uint32_t src_stride,
483                      enum v3d_tiling_mode tiling_format, int cpp,
484                      uint32_t image_h,
485                      const struct pipe_box *box)
486{
487        v3d_move_tiled_image(dst, dst_stride,
488                             src, src_stride,
489                             tiling_format,
490                             cpp,
491                             image_h,
492                             box,
493                             false);
494}
495