1/*
2 * Copyright © 2015-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "util/u_format.h"
25#include "util/u_surface.h"
26#include "util/u_blitter.h"
27#include "v3d_context.h"
28#include "v3d_tiling.h"
29
30#if 0
31static struct pipe_surface *
32v3d_get_blit_surface(struct pipe_context *pctx,
33                     struct pipe_resource *prsc, unsigned level)
34{
35        struct pipe_surface tmpl;
36
37        memset(&tmpl, 0, sizeof(tmpl));
38        tmpl.format = prsc->format;
39        tmpl.u.tex.level = level;
40        tmpl.u.tex.first_layer = 0;
41        tmpl.u.tex.last_layer = 0;
42
43        return pctx->create_surface(pctx, prsc, &tmpl);
44}
45
46static bool
47is_tile_unaligned(unsigned size, unsigned tile_size)
48{
49        return size & (tile_size - 1);
50}
51
52static bool
53v3d_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
54{
55        struct v3d_context *v3d = v3d_context(pctx);
56        bool msaa = (info->src.resource->nr_samples > 1 ||
57                     info->dst.resource->nr_samples > 1);
58        int tile_width = msaa ? 32 : 64;
59        int tile_height = msaa ? 32 : 64;
60
61        if (util_format_is_depth_or_stencil(info->dst.resource->format))
62                return false;
63
64        if (info->scissor_enable)
65                return false;
66
67        if ((info->mask & PIPE_MASK_RGBA) == 0)
68                return false;
69
70        if (info->dst.box.x != info->src.box.x ||
71            info->dst.box.y != info->src.box.y ||
72            info->dst.box.width != info->src.box.width ||
73            info->dst.box.height != info->src.box.height) {
74                return false;
75        }
76
77        int dst_surface_width = u_minify(info->dst.resource->width0,
78                                         info->dst.level);
79        int dst_surface_height = u_minify(info->dst.resource->height0,
80                                         info->dst.level);
81        if (is_tile_unaligned(info->dst.box.x, tile_width) ||
82            is_tile_unaligned(info->dst.box.y, tile_height) ||
83            (is_tile_unaligned(info->dst.box.width, tile_width) &&
84             info->dst.box.x + info->dst.box.width != dst_surface_width) ||
85            (is_tile_unaligned(info->dst.box.height, tile_height) &&
86             info->dst.box.y + info->dst.box.height != dst_surface_height)) {
87                return false;
88        }
89
90        /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
91         * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
92         * destination surface) to determine the stride.  This may be wrong
93         * when reading from texture miplevels > 0, which are stored in
94         * POT-sized areas.  For MSAA, the tile addresses are computed
95         * explicitly by the RCL, but still use the destination width to
96         * determine the stride (which could be fixed by explicitly supplying
97         * it in the ABI).
98         */
99        struct v3d_resource *rsc = v3d_resource(info->src.resource);
100
101        uint32_t stride;
102
103        if (info->src.resource->nr_samples > 1)
104                stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
105        /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T)
106           stride = align(dst_surface_width * rsc->cpp, 128); */
107        else
108                stride = align(dst_surface_width * rsc->cpp, 16);
109
110        if (stride != rsc->slices[info->src.level].stride)
111                return false;
112
113        if (info->dst.resource->format != info->src.resource->format)
114                return false;
115
116        if (false) {
117                fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
118                        info->src.box.x,
119                        info->src.box.y,
120                        info->dst.box.x,
121                        info->dst.box.y,
122                        info->dst.box.width,
123                        info->dst.box.height);
124        }
125
126        struct pipe_surface *dst_surf =
127                v3d_get_blit_surface(pctx, info->dst.resource, info->dst.level);
128        struct pipe_surface *src_surf =
129                v3d_get_blit_surface(pctx, info->src.resource, info->src.level);
130
131        v3d_flush_jobs_reading_resource(v3d, info->src.resource);
132
133        struct v3d_job *job = v3d_get_job(v3d, dst_surf, NULL);
134        pipe_surface_reference(&job->color_read, src_surf);
135
136        /* If we're resolving from MSAA to single sample, we still need to run
137         * the engine in MSAA mode for the load.
138         */
139        if (!job->msaa && info->src.resource->nr_samples > 1) {
140                job->msaa = true;
141                job->tile_width = 32;
142                job->tile_height = 32;
143        }
144
145        job->draw_min_x = info->dst.box.x;
146        job->draw_min_y = info->dst.box.y;
147        job->draw_max_x = info->dst.box.x + info->dst.box.width;
148        job->draw_max_y = info->dst.box.y + info->dst.box.height;
149        job->draw_width = dst_surf->width;
150        job->draw_height = dst_surf->height;
151
152        job->tile_width = tile_width;
153        job->tile_height = tile_height;
154        job->msaa = msaa;
155        job->needs_flush = true;
156        job->resolve |= PIPE_CLEAR_COLOR;
157
158        v3d_job_submit(v3d, job);
159
160        pipe_surface_reference(&dst_surf, NULL);
161        pipe_surface_reference(&src_surf, NULL);
162
163        return true;
164}
165#endif
166
167void
168v3d_blitter_save(struct v3d_context *v3d)
169{
170        util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
171                                                        v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
172        util_blitter_save_vertex_buffer_slot(v3d->blitter, v3d->vertexbuf.vb);
173        util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
174        util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
175        util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
176                                     v3d->streamout.targets);
177        util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
178        util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
179        util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
180        util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
181        util_blitter_save_blend(v3d->blitter, v3d->blend);
182        util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
183        util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
184        util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask);
185        util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
186        util_blitter_save_fragment_sampler_states(v3d->blitter,
187                        v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
188                        (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
189        util_blitter_save_fragment_sampler_views(v3d->blitter,
190                        v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
191                        v3d->tex[PIPE_SHADER_FRAGMENT].textures);
192        util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
193                                     v3d->streamout.targets);
194}
195
196static bool
197v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
198{
199        struct v3d_context *v3d = v3d_context(ctx);
200        struct v3d_resource *src = v3d_resource(info->src.resource);
201        struct pipe_resource *tiled = NULL;
202
203        if (!src->tiled) {
204                struct pipe_box box = {
205                        .x = 0,
206                        .y = 0,
207                        .width = u_minify(info->src.resource->width0,
208                                           info->src.level),
209                        .height = u_minify(info->src.resource->height0,
210                                           info->src.level),
211                        .depth = 1,
212                };
213                struct pipe_resource tmpl = {
214                        .target = info->src.resource->target,
215                        .format = info->src.resource->format,
216                        .width0 = box.width,
217                        .height0 = box.height,
218                        .depth0 = 1,
219                        .array_size = 1,
220                };
221                tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
222                if (!tiled) {
223                        fprintf(stderr, "Failed to create tiled blit temp\n");
224                        return false;
225                }
226                ctx->resource_copy_region(ctx,
227                                          tiled, 0,
228                                          0, 0, 0,
229                                          info->src.resource, info->src.level,
230                                          &box);
231                info->src.level = 0;
232                info->src.resource = tiled;
233        }
234
235        if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
236                fprintf(stderr, "blit unsupported %s -> %s\n",
237                    util_format_short_name(info->src.resource->format),
238                    util_format_short_name(info->dst.resource->format));
239                return false;
240        }
241
242        v3d_blitter_save(v3d);
243        util_blitter_blit(v3d->blitter, info);
244
245        pipe_resource_reference(&tiled, NULL);
246
247        return true;
248}
249
250/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
251 * or R8 texture.
252 */
253static void
254v3d_stencil_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
255{
256        struct v3d_context *v3d = v3d_context(ctx);
257        struct v3d_resource *src = v3d_resource(info->src.resource);
258        struct v3d_resource *dst = v3d_resource(info->dst.resource);
259        enum pipe_format src_format, dst_format;
260
261        if (src->separate_stencil) {
262                src = src->separate_stencil;
263                src_format = PIPE_FORMAT_R8_UNORM;
264        } else {
265                src_format = PIPE_FORMAT_RGBA8888_UNORM;
266        }
267
268        if (dst->separate_stencil) {
269                dst = dst->separate_stencil;
270                dst_format = PIPE_FORMAT_R8_UNORM;
271        } else {
272                dst_format = PIPE_FORMAT_RGBA8888_UNORM;
273        }
274
275        /* Initialize the surface. */
276        struct pipe_surface dst_tmpl = {
277                .u.tex = {
278                        .level = info->dst.level,
279                        .first_layer = info->dst.box.z,
280                        .last_layer = info->dst.box.z,
281                },
282                .format = dst_format,
283        };
284        struct pipe_surface *dst_surf =
285                ctx->create_surface(ctx, &dst->base, &dst_tmpl);
286
287        /* Initialize the sampler view. */
288        struct pipe_sampler_view src_tmpl = {
289                .target = src->base.target,
290                .format = src_format,
291                .u.tex = {
292                        .first_level = info->src.level,
293                        .last_level = info->src.level,
294                        .first_layer = 0,
295                        .last_layer = (PIPE_TEXTURE_3D ?
296                                       u_minify(src->base.depth0,
297                                                info->src.level) - 1 :
298                                       src->base.array_size - 1),
299                },
300                .swizzle_r = PIPE_SWIZZLE_X,
301                .swizzle_g = PIPE_SWIZZLE_Y,
302                .swizzle_b = PIPE_SWIZZLE_Z,
303                .swizzle_a = PIPE_SWIZZLE_W,
304        };
305        struct pipe_sampler_view *src_view =
306                ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
307
308        v3d_blitter_save(v3d);
309        util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
310                                  src_view, &info->src.box,
311                                  src->base.width0, src->base.height0,
312                                  PIPE_MASK_R,
313                                  PIPE_TEX_FILTER_NEAREST,
314                                  info->scissor_enable ? &info->scissor : NULL,
315                                  info->alpha_blend);
316
317        pipe_surface_reference(&dst_surf, NULL);
318        pipe_sampler_view_reference(&src_view, NULL);
319}
320
321/* Disable level 0 write, just write following mipmaps */
322#define V3D_TFU_IOA_DIMTW (1 << 0)
323#define V3D_TFU_IOA_FORMAT_SHIFT 3
324#define V3D_TFU_IOA_FORMAT_LINEARTILE 3
325#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
326#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
327#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
328#define V3D_TFU_IOA_FORMAT_UIF_XOR 7
329
330#define V3D_TFU_ICFG_NUMMM_SHIFT 5
331#define V3D_TFU_ICFG_TTYPE_SHIFT 9
332
333#define V3D_TFU_ICFG_OPAD_SHIFT 22
334
335#define V3D_TFU_ICFG_FORMAT_SHIFT 18
336#define V3D_TFU_ICFG_FORMAT_RASTER 0
337#define V3D_TFU_ICFG_FORMAT_SAND_128 1
338#define V3D_TFU_ICFG_FORMAT_SAND_256 2
339#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
340#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
341#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
342#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
343#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
344
345static bool
346v3d_tfu(struct pipe_context *pctx,
347        struct pipe_resource *pdst,
348        struct pipe_resource *psrc,
349        unsigned int src_level,
350        unsigned int base_level,
351        unsigned int last_level,
352        unsigned int src_layer,
353        unsigned int dst_layer)
354{
355        struct v3d_context *v3d = v3d_context(pctx);
356        struct v3d_screen *screen = v3d->screen;
357        struct v3d_resource *src = v3d_resource(psrc);
358        struct v3d_resource *dst = v3d_resource(pdst);
359        struct v3d_resource_slice *src_base_slice = &src->slices[src_level];
360        struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level];
361        int msaa_scale = pdst->nr_samples > 1 ? 2 : 1;
362        int width = u_minify(pdst->width0, base_level) * msaa_scale;
363        int height = u_minify(pdst->height0, base_level) * msaa_scale;
364
365        if (psrc->format != pdst->format)
366                return false;
367        if (psrc->nr_samples != pdst->nr_samples)
368                return false;
369
370        uint32_t tex_format = v3d_get_tex_format(&screen->devinfo,
371                                                 pdst->format);
372
373        if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format))
374                return false;
375
376        if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D)
377                return false;
378
379        /* Can't write to raster. */
380        if (dst_base_slice->tiling == VC5_TILING_RASTER)
381                return false;
382
383        v3d_flush_jobs_writing_resource(v3d, psrc);
384        v3d_flush_jobs_reading_resource(v3d, pdst);
385
386        struct drm_v3d_submit_tfu tfu = {
387                .ios = (height << 16) | width,
388                .bo_handles = {
389                        dst->bo->handle,
390                        src != dst ? src->bo->handle : 0
391                },
392                .in_sync = v3d->out_sync,
393                .out_sync = v3d->out_sync,
394        };
395        uint32_t src_offset = (src->bo->offset +
396                               v3d_layer_offset(psrc, src_level, src_layer));
397        tfu.iia |= src_offset;
398        if (src_base_slice->tiling == VC5_TILING_RASTER) {
399                tfu.icfg |= (V3D_TFU_ICFG_FORMAT_RASTER <<
400                             V3D_TFU_ICFG_FORMAT_SHIFT);
401        } else {
402                tfu.icfg |= ((V3D_TFU_ICFG_FORMAT_LINEARTILE +
403                              (src_base_slice->tiling - VC5_TILING_LINEARTILE)) <<
404                             V3D_TFU_ICFG_FORMAT_SHIFT);
405        }
406
407        uint32_t dst_offset = (dst->bo->offset +
408                               v3d_layer_offset(pdst, src_level, dst_layer));
409        tfu.ioa |= dst_offset;
410        if (last_level != base_level)
411                tfu.ioa |= V3D_TFU_IOA_DIMTW;
412        tfu.ioa |= ((V3D_TFU_IOA_FORMAT_LINEARTILE +
413                     (dst_base_slice->tiling - VC5_TILING_LINEARTILE)) <<
414                    V3D_TFU_IOA_FORMAT_SHIFT);
415
416        tfu.icfg |= tex_format << V3D_TFU_ICFG_TTYPE_SHIFT;
417        tfu.icfg |= (last_level - base_level) << V3D_TFU_ICFG_NUMMM_SHIFT;
418
419        switch (src_base_slice->tiling) {
420        case VC5_TILING_UIF_NO_XOR:
421        case VC5_TILING_UIF_XOR:
422                tfu.iis |= (src_base_slice->padded_height /
423                            (2 * v3d_utile_height(src->cpp)));
424                break;
425        case VC5_TILING_RASTER:
426                tfu.iis |= src_base_slice->stride / src->cpp;
427                break;
428        case VC5_TILING_LINEARTILE:
429        case VC5_TILING_UBLINEAR_1_COLUMN:
430        case VC5_TILING_UBLINEAR_2_COLUMN:
431                break;
432       }
433
434        /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
435         * OPAD field for the destination (how many extra UIF blocks beyond
436         * those necessary to cover the height).  When filling mipmaps, the
437         * miplevel 1+ tiling state is inferred.
438         */
439        if (dst_base_slice->tiling == VC5_TILING_UIF_NO_XOR ||
440            dst_base_slice->tiling == VC5_TILING_UIF_XOR) {
441                int uif_block_h = 2 * v3d_utile_height(dst->cpp);
442                int implicit_padded_height = align(height, uif_block_h);
443
444                tfu.icfg |= (((dst_base_slice->padded_height -
445                               implicit_padded_height) / uif_block_h) <<
446                             V3D_TFU_ICFG_OPAD_SHIFT);
447        }
448
449        int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu);
450        if (ret != 0) {
451                fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
452                return false;
453        }
454
455        dst->writes++;
456
457        return true;
458}
459
460boolean
461v3d_generate_mipmap(struct pipe_context *pctx,
462                    struct pipe_resource *prsc,
463                    enum pipe_format format,
464                    unsigned int base_level,
465                    unsigned int last_level,
466                    unsigned int first_layer,
467                    unsigned int last_layer)
468{
469        if (format != prsc->format)
470                return false;
471
472        /* We could maybe support looping over layers for array textures, but
473         * we definitely don't support 3D.
474         */
475        if (first_layer != last_layer)
476                return false;
477
478        return v3d_tfu(pctx,
479                       prsc, prsc,
480                       base_level,
481                       base_level, last_level,
482                       first_layer, first_layer);
483}
484
485static bool
486v3d_tfu_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
487{
488        int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
489        int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
490
491        if ((info->mask & PIPE_MASK_RGBA) == 0)
492                return false;
493
494        if (info->scissor_enable ||
495            info->dst.box.x != 0 ||
496            info->dst.box.y != 0 ||
497            info->dst.box.width != dst_width ||
498            info->dst.box.height != dst_height ||
499            info->src.box.x != 0 ||
500            info->src.box.y != 0 ||
501            info->src.box.width != info->dst.box.width ||
502            info->src.box.height != info->dst.box.height) {
503                return false;
504        }
505
506        if (info->dst.format != info->src.format)
507                return false;
508
509        return v3d_tfu(pctx, info->dst.resource, info->src.resource,
510                       info->src.level,
511                       info->dst.level, info->dst.level,
512                       info->src.box.z, info->dst.box.z);
513}
514
515/* Optimal hardware path for blitting pixels.
516 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
517 */
518void
519v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
520{
521        struct v3d_context *v3d = v3d_context(pctx);
522        struct pipe_blit_info info = *blit_info;
523
524        if (info.mask & PIPE_MASK_S) {
525                v3d_stencil_blit(pctx, blit_info);
526                info.mask &= ~PIPE_MASK_S;
527        }
528
529        if (v3d_tfu_blit(pctx, blit_info))
530                info.mask &= ~PIPE_MASK_RGBA;
531
532        if (info.mask)
533                v3d_render_blit(pctx, &info);
534
535        /* Flush our blit jobs immediately.  They're unlikely to get reused by
536         * normal drawing or other blits, and without flushing we can easily
537         * run into unexpected OOMs when blits are used for a large series of
538         * texture uploads before using the textures.
539         */
540        v3d_flush_jobs_writing_resource(v3d, info.dst.resource);
541}
542