1/*
2 * Copyright (C) 2017-2019 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <stdlib.h>
25#include <string.h>
26
27#include "xf86drm.h"
28#include "drm-uapi/lima_drm.h"
29
30#include "util/u_math.h"
31#include "util/ralloc.h"
32#include "util/os_time.h"
33#include "util/hash_table.h"
34#include "util/format/u_format.h"
35#include "util/u_upload_mgr.h"
36#include "util/u_inlines.h"
37
38#include "lima_screen.h"
39#include "lima_context.h"
40#include "lima_job.h"
41#include "lima_bo.h"
42#include "lima_util.h"
43#include "lima_format.h"
44#include "lima_resource.h"
45#include "lima_texture.h"
46#include "lima_fence.h"
47#include "lima_gpu.h"
48
49#define VOID2U64(x) ((uint64_t)(unsigned long)(x))
50
51static void
52lima_get_fb_info(struct lima_job *job)
53{
54   struct lima_context *ctx = job->ctx;
55   struct lima_job_fb_info *fb = &job->fb;
56
57   fb->width = ctx->framebuffer.base.width;
58   fb->height = ctx->framebuffer.base.height;
59
60   int width = align(fb->width, 16) >> 4;
61   int height = align(fb->height, 16) >> 4;
62
63   struct lima_screen *screen = lima_screen(ctx->base.screen);
64
65   fb->tiled_w = width;
66   fb->tiled_h = height;
67
68   fb->shift_h = 0;
69   fb->shift_w = 0;
70
71   int limit = screen->plb_max_blk;
72   while ((width * height) > limit) {
73      if (width >= height) {
74         width = (width + 1) >> 1;
75         fb->shift_w++;
76      } else {
77         height = (height + 1) >> 1;
78         fb->shift_h++;
79      }
80   }
81
82   fb->block_w = width;
83   fb->block_h = height;
84
85   fb->shift_min = MIN3(fb->shift_w, fb->shift_h, 2);
86}
87
88static struct lima_job *
89lima_job_create(struct lima_context *ctx)
90{
91   struct lima_job *s;
92
93   s = rzalloc(ctx, struct lima_job);
94   if (!s)
95      return NULL;
96
97   s->fd = lima_screen(ctx->base.screen)->fd;
98   s->ctx = ctx;
99
100   s->damage_rect.minx = s->damage_rect.miny = 0xffff;
101   s->damage_rect.maxx = s->damage_rect.maxy = 0;
102   s->draws = 0;
103
104   s->clear.depth = 0x00ffffff;
105
106   for (int i = 0; i < 2; i++) {
107      util_dynarray_init(s->gem_bos + i, s);
108      util_dynarray_init(s->bos + i, s);
109   }
110
111   util_dynarray_init(&s->vs_cmd_array, s);
112   util_dynarray_init(&s->plbu_cmd_array, s);
113   util_dynarray_init(&s->plbu_cmd_head, s);
114
115   struct lima_context_framebuffer *fb = &ctx->framebuffer;
116   pipe_surface_reference(&s->key.cbuf, fb->base.cbufs[0]);
117   pipe_surface_reference(&s->key.zsbuf, fb->base.zsbuf);
118
119   lima_get_fb_info(s);
120
121   s->dump = lima_dump_create();
122
123   return s;
124}
125
126static void
127lima_job_free(struct lima_job *job)
128{
129   struct lima_context *ctx = job->ctx;
130
131   _mesa_hash_table_remove_key(ctx->jobs, &job->key);
132
133   if (job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0))
134      _mesa_hash_table_remove_key(ctx->write_jobs, job->key.cbuf->texture);
135   if (job->key.zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)))
136      _mesa_hash_table_remove_key(ctx->write_jobs, job->key.zsbuf->texture);
137
138   pipe_surface_reference(&job->key.cbuf, NULL);
139   pipe_surface_reference(&job->key.zsbuf, NULL);
140
141   lima_dump_free(job->dump);
142   job->dump = NULL;
143
144   /* TODO: do we need a cache for job? */
145   ralloc_free(job);
146}
147
148static struct lima_job *
149_lima_job_get(struct lima_context *ctx)
150{
151   struct lima_context_framebuffer *fb = &ctx->framebuffer;
152   struct lima_job_key local_key = {
153      .cbuf = fb->base.cbufs[0],
154      .zsbuf = fb->base.zsbuf,
155   };
156
157   struct hash_entry *entry = _mesa_hash_table_search(ctx->jobs, &local_key);
158   if (entry)
159      return entry->data;
160
161   struct lima_job *job = lima_job_create(ctx);
162   if (!job)
163      return NULL;
164
165   _mesa_hash_table_insert(ctx->jobs, &job->key, job);
166
167   return job;
168}
169
170/*
171 * Note: this function can only be called in draw code path,
172 * must not exist in flush code path.
173 */
174struct lima_job *
175lima_job_get(struct lima_context *ctx)
176{
177   if (ctx->job)
178      return ctx->job;
179
180   ctx->job = _lima_job_get(ctx);
181   return ctx->job;
182}
183
184bool lima_job_add_bo(struct lima_job *job, int pipe,
185                     struct lima_bo *bo, uint32_t flags)
186{
187   util_dynarray_foreach(job->gem_bos + pipe, struct drm_lima_gem_submit_bo, gem_bo) {
188      if (bo->handle == gem_bo->handle) {
189         gem_bo->flags |= flags;
190         return true;
191      }
192   }
193
194   struct drm_lima_gem_submit_bo *job_bo =
195      util_dynarray_grow(job->gem_bos + pipe, struct drm_lima_gem_submit_bo, 1);
196   job_bo->handle = bo->handle;
197   job_bo->flags = flags;
198
199   struct lima_bo **jbo = util_dynarray_grow(job->bos + pipe, struct lima_bo *, 1);
200   *jbo = bo;
201
202   /* prevent bo from being freed when job start */
203   lima_bo_reference(bo);
204
205   return true;
206}
207
208static bool
209lima_job_start(struct lima_job *job, int pipe, void *frame, uint32_t size)
210{
211   struct lima_context *ctx = job->ctx;
212   struct drm_lima_gem_submit req = {
213      .ctx = ctx->id,
214      .pipe = pipe,
215      .nr_bos = job->gem_bos[pipe].size / sizeof(struct drm_lima_gem_submit_bo),
216      .bos = VOID2U64(util_dynarray_begin(job->gem_bos + pipe)),
217      .frame = VOID2U64(frame),
218      .frame_size = size,
219      .out_sync = ctx->out_sync[pipe],
220   };
221
222   if (ctx->in_sync_fd >= 0) {
223      int err = drmSyncobjImportSyncFile(job->fd, ctx->in_sync[pipe],
224                                         ctx->in_sync_fd);
225      if (err)
226         return false;
227
228      req.in_sync[0] = ctx->in_sync[pipe];
229      close(ctx->in_sync_fd);
230      ctx->in_sync_fd = -1;
231   }
232
233   bool ret = drmIoctl(job->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0;
234
235   util_dynarray_foreach(job->bos + pipe, struct lima_bo *, bo) {
236      lima_bo_unreference(*bo);
237   }
238
239   return ret;
240}
241
242static bool
243lima_job_wait(struct lima_job *job, int pipe, uint64_t timeout_ns)
244{
245   int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns);
246   if (abs_timeout == OS_TIMEOUT_INFINITE)
247      abs_timeout = INT64_MAX;
248
249   struct lima_context *ctx = job->ctx;
250   return !drmSyncobjWait(job->fd, ctx->out_sync + pipe, 1, abs_timeout, 0, NULL);
251}
252
253static bool
254lima_job_has_bo(struct lima_job *job, struct lima_bo *bo, bool all)
255{
256   for (int i = 0; i < 2; i++) {
257      util_dynarray_foreach(job->gem_bos + i, struct drm_lima_gem_submit_bo, gem_bo) {
258         if (bo->handle == gem_bo->handle) {
259            if (all || gem_bo->flags & LIMA_SUBMIT_BO_WRITE)
260               return true;
261            else
262               break;
263         }
264      }
265   }
266
267   return false;
268}
269
270void *
271lima_job_create_stream_bo(struct lima_job *job, int pipe,
272                          unsigned size, uint32_t *va)
273{
274   struct lima_context *ctx = job->ctx;
275
276   void *cpu;
277   unsigned offset;
278   struct pipe_resource *pres = NULL;
279   u_upload_alloc(ctx->uploader, 0, size, 0x40, &offset, &pres, &cpu);
280
281   struct lima_resource *res = lima_resource(pres);
282   *va = res->bo->va + offset;
283
284   lima_job_add_bo(job, pipe, res->bo, LIMA_SUBMIT_BO_READ);
285
286   pipe_resource_reference(&pres, NULL);
287
288   return cpu;
289}
290
291static inline struct lima_damage_region *
292lima_job_get_damage(struct lima_job *job)
293{
294   if (!(job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0)))
295      return NULL;
296
297   struct lima_surface *surf = lima_surface(job->key.cbuf);
298   struct lima_resource *res = lima_resource(surf->base.texture);
299   return &res->damage;
300}
301
302static bool
303lima_fb_cbuf_needs_reload(struct lima_job *job)
304{
305   if (!job->key.cbuf)
306      return false;
307
308   struct lima_surface *surf = lima_surface(job->key.cbuf);
309   struct lima_resource *res = lima_resource(surf->base.texture);
310   if (res->damage.region) {
311      /* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled,
312       * we need to reload damage region, otherwise just want to reload
313       * the region not aligned to tile boundary */
314      //if (!res->damage.aligned)
315      //   return true;
316      return true;
317   }
318   else if (surf->reload & PIPE_CLEAR_COLOR0)
319         return true;
320
321   return false;
322}
323
324static bool
325lima_fb_zsbuf_needs_reload(struct lima_job *job)
326{
327   if (!job->key.zsbuf)
328      return false;
329
330   struct lima_surface *surf = lima_surface(job->key.zsbuf);
331   if (surf->reload & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
332         return true;
333
334   return false;
335}
336
337static void
338lima_pack_reload_plbu_cmd(struct lima_job *job, struct pipe_surface *psurf)
339{
340   #define lima_reload_render_state_offset 0x0000
341   #define lima_reload_gl_pos_offset       0x0040
342   #define lima_reload_varying_offset      0x0080
343   #define lima_reload_tex_desc_offset     0x00c0
344   #define lima_reload_tex_array_offset    0x0100
345   #define lima_reload_buffer_size         0x0140
346
347   struct lima_context *ctx = job->ctx;
348   struct lima_surface *surf = lima_surface(psurf);
349   int level = psurf->u.tex.level;
350   unsigned first_layer = psurf->u.tex.first_layer;
351
352   uint32_t va;
353   void *cpu = lima_job_create_stream_bo(
354      job, LIMA_PIPE_PP, lima_reload_buffer_size, &va);
355
356   struct lima_screen *screen = lima_screen(ctx->base.screen);
357
358   uint32_t reload_shader_first_instr_size =
359      ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f;
360   uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset;
361
362   struct lima_render_state reload_render_state = {
363      .alpha_blend = 0xf03b1ad2,
364      .depth_test = 0x0000000e,
365      .depth_range = 0xffff0000,
366      .stencil_front = 0x00000007,
367      .stencil_back = 0x00000007,
368      .multi_sample = 0x0000f007,
369      .shader_address = reload_shader_va | reload_shader_first_instr_size,
370      .varying_types = 0x00000001,
371      .textures_address = va + lima_reload_tex_array_offset,
372      .aux0 = 0x00004021,
373      .varyings_address = va + lima_reload_varying_offset,
374   };
375
376   if (util_format_is_depth_or_stencil(psurf->format)) {
377      reload_render_state.alpha_blend &= 0x0fffffff;
378      if (psurf->format != PIPE_FORMAT_Z16_UNORM)
379         reload_render_state.depth_test |= 0x400;
380      if (surf->reload & PIPE_CLEAR_DEPTH)
381         reload_render_state.depth_test |= 0x801;
382      if (surf->reload & PIPE_CLEAR_STENCIL) {
383         reload_render_state.depth_test |= 0x1000;
384         reload_render_state.stencil_front = 0x0000024f;
385         reload_render_state.stencil_back = 0x0000024f;
386         reload_render_state.stencil_test = 0x0000ffff;
387      }
388   }
389
390   memcpy(cpu + lima_reload_render_state_offset, &reload_render_state,
391          sizeof(reload_render_state));
392
393   lima_tex_desc *td = cpu + lima_reload_tex_desc_offset;
394   memset(td, 0, lima_min_tex_desc_size);
395   lima_texture_desc_set_res(ctx, td, psurf->texture, level, level, first_layer);
396   td->format = lima_format_get_texel_reload(psurf->format);
397   td->unnorm_coords = 1;
398   td->texture_type = LIMA_TEXTURE_TYPE_2D;
399   td->min_img_filter_nearest = 1;
400   td->mag_img_filter_nearest = 1;
401   td->wrap_s_clamp_to_edge = 1;
402   td->wrap_t_clamp_to_edge = 1;
403   td->unknown_2_2 = 0x1;
404
405   uint32_t *ta = cpu + lima_reload_tex_array_offset;
406   ta[0] = va + lima_reload_tex_desc_offset;
407
408   struct lima_job_fb_info *fb = &job->fb;
409   float reload_gl_pos[] = {
410      fb->width, 0,          0, 1,
411      0,         0,          0, 1,
412      0,         fb->height, 0, 1,
413   };
414   memcpy(cpu + lima_reload_gl_pos_offset, reload_gl_pos,
415          sizeof(reload_gl_pos));
416
417   float reload_varying[] = {
418      fb->width, 0,          0, 0,
419      0,         fb->height, 0, 0,
420   };
421   memcpy(cpu + lima_reload_varying_offset, reload_varying,
422          sizeof(reload_varying));
423
424   PLBU_CMD_BEGIN(&job->plbu_cmd_head, 20);
425
426   PLBU_CMD_VIEWPORT_LEFT(0);
427   PLBU_CMD_VIEWPORT_RIGHT(fui(fb->width));
428   PLBU_CMD_VIEWPORT_BOTTOM(0);
429   PLBU_CMD_VIEWPORT_TOP(fui(fb->height));
430
431   PLBU_CMD_RSW_VERTEX_ARRAY(
432      va + lima_reload_render_state_offset,
433      va + lima_reload_gl_pos_offset);
434
435   PLBU_CMD_UNKNOWN2();
436   PLBU_CMD_UNKNOWN1();
437
438   PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset);
439   PLBU_CMD_INDEXED_DEST(va + lima_reload_gl_pos_offset);
440   PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
441
442   PLBU_CMD_END();
443
444   lima_dump_command_stream_print(job->dump, cpu, lima_reload_buffer_size,
445                                  false, "reload plbu cmd at va %x\n", va);
446}
447
448static void
449lima_pack_head_plbu_cmd(struct lima_job *job)
450{
451   struct lima_context *ctx = job->ctx;
452   struct lima_job_fb_info *fb = &job->fb;
453
454   PLBU_CMD_BEGIN(&job->plbu_cmd_head, 10);
455
456   PLBU_CMD_UNKNOWN2();
457   PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w);
458   PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h);
459   PLBU_CMD_BLOCK_STRIDE(fb->block_w);
460
461   PLBU_CMD_ARRAY_ADDRESS(
462      ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size,
463      fb->block_w * fb->block_h);
464
465   PLBU_CMD_END();
466
467   if (lima_fb_cbuf_needs_reload(job))
468      lima_pack_reload_plbu_cmd(job, job->key.cbuf);
469
470   if (lima_fb_zsbuf_needs_reload(job))
471      lima_pack_reload_plbu_cmd(job, job->key.zsbuf);
472}
473
474static void
475hilbert_rotate(int n, int *x, int *y, int rx, int ry)
476{
477   if (ry == 0) {
478      if (rx == 1) {
479         *x = n-1 - *x;
480         *y = n-1 - *y;
481      }
482
483      /* Swap x and y */
484      int t  = *x;
485      *x = *y;
486      *y = t;
487   }
488}
489
490static void
491hilbert_coords(int n, int d, int *x, int *y)
492{
493   int rx, ry, i, t=d;
494
495   *x = *y = 0;
496
497   for (i = 0; (1 << i) < n; i++) {
498
499      rx = 1 & (t / 2);
500      ry = 1 & (t ^ rx);
501
502      hilbert_rotate(1 << i, x, y, rx, ry);
503
504      *x += rx << i;
505      *y += ry << i;
506
507      t /= 4;
508   }
509}
510
511static int
512lima_get_pp_stream_size(int num_pp, int tiled_w, int tiled_h, uint32_t *off)
513{
514   /* carefully calculate each stream start address:
515    * 1. overflow: each stream size may be different due to
516    *    fb->tiled_w * fb->tiled_h can't be divided by num_pp,
517    *    extra size should be added to the preceeding stream
518    * 2. alignment: each stream address should be 0x20 aligned
519    */
520   int delta = tiled_w * tiled_h / num_pp * 16 + 16;
521   int remain = tiled_w * tiled_h % num_pp;
522   int offset = 0;
523
524   for (int i = 0; i < num_pp; i++) {
525      off[i] = offset;
526
527      offset += delta;
528      if (remain) {
529         offset += 16;
530         remain--;
531      }
532      offset = align(offset, 0x20);
533   }
534
535   return offset;
536}
537
538static void
539lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
540                        int tiled_w, int tiled_h)
541{
542   struct lima_context *ctx = job->ctx;
543   struct lima_pp_stream_state *ps = &ctx->pp_stream;
544   struct lima_job_fb_info *fb = &job->fb;
545   struct lima_screen *screen = lima_screen(ctx->base.screen);
546   int i, num_pp = screen->num_pp;
547
548   /* use hilbert_coords to generates 1D to 2D relationship.
549    * 1D for pp stream index and 2D for plb block x/y on framebuffer.
550    * if multi pp, interleave the 1D index to make each pp's render target
551    * close enough which should result close workload
552    */
553   int max = MAX2(tiled_w, tiled_h);
554   int index = 0;
555   uint32_t *stream[8];
556   int si[8] = {0};
557   int dim = 0;
558   int count = 0;
559
560   /* Don't update count if we get zero rect. We'll just generate
561    * PP stream with just terminators in it.
562    */
563   if ((tiled_w * tiled_h) != 0) {
564      dim = util_logbase2_ceil(max);
565      count = 1 << (dim + dim);
566   }
567
568   for (i = 0; i < num_pp; i++)
569      stream[i] = ps->map + ps->offset[i];
570
571   for (i = 0; i < count; i++) {
572      int x, y;
573      hilbert_coords(max, i, &x, &y);
574      if (x < tiled_w && y < tiled_h) {
575         x += off_x;
576         y += off_y;
577
578         int pp = index % num_pp;
579         int offset = ((y >> fb->shift_h) * fb->block_w +
580                       (x >> fb->shift_w)) * LIMA_CTX_PLB_BLK_SIZE;
581         int plb_va = ctx->plb[ctx->plb_index]->va + offset;
582
583         stream[pp][si[pp]++] = 0;
584         stream[pp][si[pp]++] = 0xB8000000 | x | (y << 8);
585         stream[pp][si[pp]++] = 0xE0000002 | ((plb_va >> 3) & ~0xE0000003);
586         stream[pp][si[pp]++] = 0xB0000000;
587
588         index++;
589      }
590   }
591
592   for (i = 0; i < num_pp; i++) {
593      stream[i][si[i]++] = 0;
594      stream[i][si[i]++] = 0xBC000000;
595      stream[i][si[i]++] = 0;
596      stream[i][si[i]++] = 0;
597
598      lima_dump_command_stream_print(
599         job->dump, stream[i], si[i] * 4,
600         false, "pp plb stream %d at va %x\n",
601         i, ps->va + ps->offset[i]);
602   }
603}
604
605static void
606lima_free_stale_pp_stream_bo(struct lima_context *ctx)
607{
608   list_for_each_entry_safe(struct lima_ctx_plb_pp_stream, entry,
609                            &ctx->plb_pp_stream_lru_list, lru_list) {
610      if (ctx->plb_stream_cache_size <= lima_plb_pp_stream_cache_size)
611         break;
612
613      struct hash_entry *hash_entry =
614         _mesa_hash_table_search(ctx->plb_pp_stream, &entry->key);
615      if (hash_entry)
616         _mesa_hash_table_remove(ctx->plb_pp_stream, hash_entry);
617      list_del(&entry->lru_list);
618
619      ctx->plb_stream_cache_size -= entry->bo->size;
620      lima_bo_unreference(entry->bo);
621
622      ralloc_free(entry);
623   }
624}
625
626static void
627lima_update_damage_pp_stream(struct lima_job *job)
628{
629   struct lima_context *ctx = job->ctx;
630   struct lima_damage_region *ds = lima_job_get_damage(job);
631   struct lima_job_fb_info *fb = &job->fb;
632   struct pipe_scissor_state bound;
633   struct pipe_scissor_state *dr = &job->damage_rect;
634
635   if (ds && ds->region) {
636      struct pipe_scissor_state *dbound = &ds->bound;
637      bound.minx = MAX2(dbound->minx, dr->minx >> 4);
638      bound.miny = MAX2(dbound->miny, dr->miny >> 4);
639      bound.maxx = MIN2(dbound->maxx, (dr->maxx + 0xf) >> 4);
640      bound.maxy = MIN2(dbound->maxy, (dr->maxy + 0xf) >> 4);
641   } else {
642      bound.minx = dr->minx >> 4;
643      bound.miny = dr->miny >> 4;
644      bound.maxx = (dr->maxx + 0xf) >> 4;
645      bound.maxy = (dr->maxy + 0xf) >> 4;
646   }
647
648   /* Clamp to FB size */
649   bound.minx = MIN2(bound.minx, fb->tiled_w);
650   bound.miny = MIN2(bound.miny, fb->tiled_h);
651   bound.maxx = MIN2(bound.maxx, fb->tiled_w);
652   bound.maxy = MIN2(bound.maxy, fb->tiled_h);
653
654   struct lima_ctx_plb_pp_stream_key key = {
655      .plb_index = ctx->plb_index,
656      .minx = bound.minx,
657      .miny = bound.miny,
658      .maxx = bound.maxx,
659      .maxy = bound.maxy,
660      .shift_w = fb->shift_w,
661      .shift_h = fb->shift_h,
662      .block_w = fb->block_w,
663      .block_h = fb->block_h,
664   };
665
666   struct hash_entry *entry =
667      _mesa_hash_table_search(ctx->plb_pp_stream, &key);
668   if (entry) {
669      struct lima_ctx_plb_pp_stream *s = entry->data;
670
671      list_del(&s->lru_list);
672      list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list);
673
674      ctx->pp_stream.map = lima_bo_map(s->bo);
675      ctx->pp_stream.va = s->bo->va;
676      memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
677
678      lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ);
679
680      return;
681   }
682
683   lima_free_stale_pp_stream_bo(ctx);
684
685   struct lima_screen *screen = lima_screen(ctx->base.screen);
686   struct lima_ctx_plb_pp_stream *s =
687      rzalloc(ctx->plb_pp_stream, struct lima_ctx_plb_pp_stream);
688
689   list_inithead(&s->lru_list);
690   s->key.plb_index = ctx->plb_index;
691   s->key.minx = bound.minx;
692   s->key.maxx = bound.maxx;
693   s->key.miny = bound.miny;
694   s->key.maxy = bound.maxy;
695   s->key.shift_w = fb->shift_w;
696   s->key.shift_h = fb->shift_h;
697   s->key.block_w = fb->block_w;
698   s->key.block_h = fb->block_h;
699
700   int tiled_w = bound.maxx - bound.minx;
701   int tiled_h = bound.maxy - bound.miny;
702   int size = lima_get_pp_stream_size(
703      screen->num_pp, tiled_w, tiled_h, s->offset);
704
705   s->bo = lima_bo_create(screen, size, 0);
706
707   ctx->pp_stream.map = lima_bo_map(s->bo);
708   ctx->pp_stream.va = s->bo->va;
709   memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
710
711   lima_generate_pp_stream(job, bound.minx, bound.miny, tiled_w, tiled_h);
712
713   ctx->plb_stream_cache_size += size;
714   list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list);
715   _mesa_hash_table_insert(ctx->plb_pp_stream, &s->key, s);
716
717   lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ);
718}
719
720static bool
721lima_damage_fullscreen(struct lima_job *job)
722{
723   struct pipe_scissor_state *dr = &job->damage_rect;
724
725   return dr->minx == 0 &&
726          dr->miny == 0 &&
727          dr->maxx == job->fb.width &&
728          dr->maxy == job->fb.height;
729}
730
731static void
732lima_update_pp_stream(struct lima_job *job)
733{
734   struct lima_context *ctx = job->ctx;
735   struct lima_screen *screen = lima_screen(ctx->base.screen);
736   struct lima_damage_region *damage = lima_job_get_damage(job);
737   if ((screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) ||
738       (damage && damage->region) || !lima_damage_fullscreen(job))
739      lima_update_damage_pp_stream(job);
740   else
741      /* Mali450 doesn't need full PP stream */
742      ctx->pp_stream.map = NULL;
743}
744
745static void
746lima_update_job_bo(struct lima_job *job)
747{
748   struct lima_context *ctx = job->ctx;
749
750   lima_job_add_bo(job, LIMA_PIPE_GP, ctx->plb_gp_stream,
751                      LIMA_SUBMIT_BO_READ);
752   lima_job_add_bo(job, LIMA_PIPE_GP, ctx->plb[ctx->plb_index],
753                      LIMA_SUBMIT_BO_WRITE);
754   lima_job_add_bo(job, LIMA_PIPE_GP, ctx->gp_tile_heap[ctx->plb_index],
755                      LIMA_SUBMIT_BO_WRITE);
756
757   lima_dump_command_stream_print(
758      job->dump, ctx->plb_gp_stream->map + ctx->plb_index * ctx->plb_gp_size,
759      ctx->plb_gp_size, false, "gp plb stream at va %x\n",
760      ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size);
761
762   lima_job_add_bo(job, LIMA_PIPE_PP, ctx->plb[ctx->plb_index],
763                      LIMA_SUBMIT_BO_READ);
764   lima_job_add_bo(job, LIMA_PIPE_PP, ctx->gp_tile_heap[ctx->plb_index],
765                      LIMA_SUBMIT_BO_READ);
766
767   struct lima_screen *screen = lima_screen(ctx->base.screen);
768   lima_job_add_bo(job, LIMA_PIPE_PP, screen->pp_buffer, LIMA_SUBMIT_BO_READ);
769}
770
771static void
772lima_finish_plbu_cmd(struct util_dynarray *plbu_cmd_array)
773{
774   int i = 0;
775   uint32_t *plbu_cmd = util_dynarray_ensure_cap(plbu_cmd_array, plbu_cmd_array->size + 2 * 4);
776
777   plbu_cmd[i++] = 0x00000000;
778   plbu_cmd[i++] = 0x50000000; /* END */
779
780   plbu_cmd_array->size += i * 4;
781}
782
783static void
784lima_pack_wb_zsbuf_reg(struct lima_job *job, uint32_t *wb_reg, int wb_idx)
785{
786   struct lima_job_fb_info *fb = &job->fb;
787   struct pipe_surface *zsbuf = job->key.zsbuf;
788   struct lima_resource *res = lima_resource(zsbuf->texture);
789   int level = zsbuf->u.tex.level;
790   uint32_t format = lima_format_get_pixel(zsbuf->format);
791
792   struct lima_pp_wb_reg *wb = (void *)wb_reg;
793   wb[wb_idx].type = 0x01; /* 1 for depth, stencil */
794   wb[wb_idx].address = res->bo->va + res->levels[level].offset;
795   wb[wb_idx].pixel_format = format;
796   if (res->tiled) {
797      wb[wb_idx].pixel_layout = 0x2;
798      wb[wb_idx].pitch = fb->tiled_w;
799   } else {
800      wb[wb_idx].pixel_layout = 0x0;
801      wb[wb_idx].pitch = res->levels[level].stride / 8;
802   }
803   wb[wb_idx].mrt_bits = 0;
804}
805
806static void
807lima_pack_wb_cbuf_reg(struct lima_job *job, uint32_t *frame_reg,
808                      uint32_t *wb_reg, int wb_idx)
809{
810   struct lima_job_fb_info *fb = &job->fb;
811   struct pipe_surface *cbuf = job->key.cbuf;
812   struct lima_resource *res = lima_resource(cbuf->texture);
813   int level = cbuf->u.tex.level;
814   unsigned layer = cbuf->u.tex.first_layer;
815   uint32_t format = lima_format_get_pixel(cbuf->format);
816   bool swap_channels = lima_format_get_pixel_swap_rb(cbuf->format);
817
818   struct lima_pp_frame_reg *frame = (void *)frame_reg;
819   frame->channel_layout = lima_format_get_channel_layout(cbuf->format);
820
821   struct lima_pp_wb_reg *wb = (void *)wb_reg;
822   wb[wb_idx].type = 0x02; /* 2 for color buffer */
823   wb[wb_idx].address = res->bo->va + res->levels[level].offset + layer * res->levels[level].layer_stride;
824   wb[wb_idx].pixel_format = format;
825   if (res->tiled) {
826      wb[wb_idx].pixel_layout = 0x2;
827      wb[wb_idx].pitch = fb->tiled_w;
828   } else {
829      wb[wb_idx].pixel_layout = 0x0;
830      wb[wb_idx].pitch = res->levels[level].stride / 8;
831   }
832   wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
833}
834
835static void
836lima_pack_pp_frame_reg(struct lima_job *job, uint32_t *frame_reg,
837                       uint32_t *wb_reg)
838{
839   struct lima_context *ctx = job->ctx;
840   struct lima_job_fb_info *fb = &job->fb;
841   struct pipe_surface *cbuf = job->key.cbuf;
842   struct lima_pp_frame_reg *frame = (void *)frame_reg;
843   struct lima_screen *screen = lima_screen(ctx->base.screen);
844   int wb_idx = 0;
845
846   frame->render_address = screen->pp_buffer->va + pp_frame_rsw_offset;
847   frame->flags = 0x02;
848   if (cbuf && util_format_is_float(cbuf->format)) {
849      frame->flags |= 0x01; /* enable fp16 */
850      frame->clear_value_color   = (uint32_t)(job->clear.color_16pc & 0xffffffffUL);
851      frame->clear_value_color_1 = (uint32_t)(job->clear.color_16pc >> 32);
852      frame->clear_value_color_2 = 0;
853      frame->clear_value_color_3 = 0;
854   }
855   else {
856      frame->clear_value_color   = job->clear.color_8pc;
857      frame->clear_value_color_1 = job->clear.color_8pc;
858      frame->clear_value_color_2 = job->clear.color_8pc;
859      frame->clear_value_color_3 = job->clear.color_8pc;
860   }
861
862   frame->clear_value_depth = job->clear.depth;
863   frame->clear_value_stencil = job->clear.stencil;
864   frame->one = 1;
865
866   frame->width = fb->width - 1;
867   frame->height = fb->height - 1;
868
869   /* frame->fragment_stack_address is overwritten per-pp in the kernel
870    * by the values of pp_frame.fragment_stack_address[i] */
871
872   /* These are "stack size" and "stack offset" shifted,
873    * here they are assumed to be always the same. */
874   frame->fragment_stack_size = job->pp_max_stack_size << 16 | job->pp_max_stack_size;
875
876   /* related with MSAA and different value when r4p0/r7p0 */
877   frame->supersampled_height = fb->height * 2 - 1;
878   frame->scale = 0xE0C;
879
880   frame->dubya = 0x77;
881   frame->onscreen = 1;
882   frame->blocking = (fb->shift_min << 28) | (fb->shift_h << 16) | fb->shift_w;
883
884   /* Set default layout to 8888 */
885   frame->channel_layout = 0x8888;
886
887   if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0))
888      lima_pack_wb_cbuf_reg(job, frame_reg, wb_reg, wb_idx++);
889
890   if (job->key.zsbuf &&
891       (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)))
892      lima_pack_wb_zsbuf_reg(job, wb_reg, wb_idx++);
893}
894
895void
896lima_do_job(struct lima_job *job)
897{
898   #define pp_stack_pp_size 0x400
899
900   struct lima_context *ctx = job->ctx;
901
902   lima_pack_head_plbu_cmd(job);
903   lima_finish_plbu_cmd(&job->plbu_cmd_array);
904
905   lima_update_job_bo(job);
906
907   int vs_cmd_size = job->vs_cmd_array.size;
908   uint32_t vs_cmd_va = 0;
909
910   if (vs_cmd_size) {
911      void *vs_cmd = lima_job_create_stream_bo(
912         job, LIMA_PIPE_GP, vs_cmd_size, &vs_cmd_va);
913      memcpy(vs_cmd, util_dynarray_begin(&job->vs_cmd_array), vs_cmd_size);
914
915      lima_dump_command_stream_print(
916         job->dump, vs_cmd, vs_cmd_size, false, "flush vs cmd at va %x\n", vs_cmd_va);
917      lima_dump_vs_command_stream_print(job->dump, vs_cmd, vs_cmd_size, vs_cmd_va);
918   }
919
920   uint32_t plbu_cmd_va;
921   int plbu_cmd_size = job->plbu_cmd_array.size + job->plbu_cmd_head.size;
922   void *plbu_cmd = lima_job_create_stream_bo(
923      job, LIMA_PIPE_GP, plbu_cmd_size, &plbu_cmd_va);
924   memcpy(plbu_cmd,
925          util_dynarray_begin(&job->plbu_cmd_head),
926          job->plbu_cmd_head.size);
927   memcpy(plbu_cmd + job->plbu_cmd_head.size,
928          util_dynarray_begin(&job->plbu_cmd_array),
929          job->plbu_cmd_array.size);
930
931   lima_dump_command_stream_print(
932      job->dump, plbu_cmd, plbu_cmd_size, false, "flush plbu cmd at va %x\n", plbu_cmd_va);
933   lima_dump_plbu_command_stream_print(job->dump, plbu_cmd, plbu_cmd_size, plbu_cmd_va);
934
935   struct lima_screen *screen = lima_screen(ctx->base.screen);
936   struct drm_lima_gp_frame gp_frame;
937   struct lima_gp_frame_reg *gp_frame_reg = (void *)gp_frame.frame;
938   gp_frame_reg->vs_cmd_start = vs_cmd_va;
939   gp_frame_reg->vs_cmd_end = vs_cmd_va + vs_cmd_size;
940   gp_frame_reg->plbu_cmd_start = plbu_cmd_va;
941   gp_frame_reg->plbu_cmd_end = plbu_cmd_va + plbu_cmd_size;
942   gp_frame_reg->tile_heap_start = ctx->gp_tile_heap[ctx->plb_index]->va;
943   gp_frame_reg->tile_heap_end = ctx->gp_tile_heap[ctx->plb_index]->va + ctx->gp_tile_heap_size;
944
945   lima_dump_command_stream_print(
946      job->dump, &gp_frame, sizeof(gp_frame), false, "add gp frame\n");
947
948   if (!lima_job_start(job, LIMA_PIPE_GP, &gp_frame, sizeof(gp_frame)))
949      fprintf(stderr, "gp job error\n");
950
951   if (job->dump) {
952      if (lima_job_wait(job, LIMA_PIPE_GP, PIPE_TIMEOUT_INFINITE)) {
953         if (ctx->gp_output) {
954            float *pos = lima_bo_map(ctx->gp_output);
955            lima_dump_command_stream_print(
956               job->dump, pos, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
957               ctx->gp_output->va);
958         }
959
960         uint32_t *plb = lima_bo_map(ctx->plb[ctx->plb_index]);
961         lima_dump_command_stream_print(
962            job->dump, plb, LIMA_CTX_PLB_BLK_SIZE, false, "plb dump at va %x\n",
963            ctx->plb[ctx->plb_index]->va);
964      }
965      else {
966         fprintf(stderr, "gp job wait error\n");
967         exit(1);
968      }
969   }
970
971   uint32_t pp_stack_va = 0;
972   if (job->pp_max_stack_size) {
973      lima_job_create_stream_bo(
974         job, LIMA_PIPE_PP,
975         screen->num_pp * job->pp_max_stack_size * pp_stack_pp_size,
976         &pp_stack_va);
977   }
978
979   lima_update_pp_stream(job);
980
981   struct lima_pp_stream_state *ps = &ctx->pp_stream;
982   if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
983      struct drm_lima_m400_pp_frame pp_frame = {0};
984      lima_pack_pp_frame_reg(job, pp_frame.frame, pp_frame.wb);
985      pp_frame.num_pp = screen->num_pp;
986
987      for (int i = 0; i < screen->num_pp; i++) {
988         pp_frame.plbu_array_address[i] = ps->va + ps->offset[i];
989         if (job->pp_max_stack_size)
990            pp_frame.fragment_stack_address[i] = pp_stack_va +
991               job->pp_max_stack_size * pp_stack_pp_size * i;
992      }
993
994      lima_dump_command_stream_print(
995         job->dump, &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
996
997      if (!lima_job_start(job, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame)))
998         fprintf(stderr, "pp job error\n");
999   }
1000   else {
1001      struct drm_lima_m450_pp_frame pp_frame = {0};
1002      lima_pack_pp_frame_reg(job, pp_frame.frame, pp_frame.wb);
1003      pp_frame.num_pp = screen->num_pp;
1004
1005      if (job->pp_max_stack_size)
1006         for (int i = 0; i < screen->num_pp; i++)
1007            pp_frame.fragment_stack_address[i] = pp_stack_va +
1008               job->pp_max_stack_size * pp_stack_pp_size * i;
1009
1010      if (ps->map) {
1011         for (int i = 0; i < screen->num_pp; i++)
1012            pp_frame.plbu_array_address[i] = ps->va + ps->offset[i];
1013      }
1014      else {
1015         pp_frame.use_dlbu = true;
1016
1017         struct lima_job_fb_info *fb = &job->fb;
1018         pp_frame.dlbu_regs[0] = ctx->plb[ctx->plb_index]->va;
1019         pp_frame.dlbu_regs[1] = ((fb->tiled_h - 1) << 16) | (fb->tiled_w - 1);
1020         unsigned s = util_logbase2(LIMA_CTX_PLB_BLK_SIZE) - 7;
1021         pp_frame.dlbu_regs[2] = (s << 28) | (fb->shift_h << 16) | fb->shift_w;
1022         pp_frame.dlbu_regs[3] = ((fb->tiled_h - 1) << 24) | ((fb->tiled_w - 1) << 16);
1023      }
1024
1025      lima_dump_command_stream_print(
1026         job->dump, &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
1027
1028      if (!lima_job_start(job, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame)))
1029         fprintf(stderr, "pp job error\n");
1030   }
1031
1032   if (job->dump) {
1033      if (!lima_job_wait(job, LIMA_PIPE_PP, PIPE_TIMEOUT_INFINITE)) {
1034         fprintf(stderr, "pp wait error\n");
1035         exit(1);
1036      }
1037   }
1038
1039   ctx->plb_index = (ctx->plb_index + 1) % lima_ctx_num_plb;
1040
1041   /* Set reload flags for next draw. It'll be unset if buffer is cleared */
1042   if (job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) {
1043      struct lima_surface *surf = lima_surface(job->key.cbuf);
1044      surf->reload = PIPE_CLEAR_COLOR0;
1045   }
1046
1047   if (job->key.zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
1048      struct lima_surface *surf = lima_surface(job->key.zsbuf);
1049      surf->reload = (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL));
1050   }
1051
1052   if (ctx->job == job)
1053      ctx->job = NULL;
1054
1055   lima_job_free(job);
1056}
1057
1058void
1059lima_flush(struct lima_context *ctx)
1060{
1061   hash_table_foreach(ctx->jobs, entry) {
1062      struct lima_job *job = entry->data;
1063      lima_do_job(job);
1064   }
1065}
1066
1067void
1068lima_flush_job_accessing_bo(
1069   struct lima_context *ctx, struct lima_bo *bo, bool write)
1070{
1071   hash_table_foreach(ctx->jobs, entry) {
1072      struct lima_job *job = entry->data;
1073      if (lima_job_has_bo(job, bo, write))
1074         lima_do_job(job);
1075   }
1076}
1077
1078/*
1079 * This is for current job flush previous job which write to the resource it wants
1080 * to read. Tipical usage is flush the FBO which is used as current task's texture.
1081 */
1082void
1083lima_flush_previous_job_writing_resource(
1084   struct lima_context *ctx, struct pipe_resource *prsc)
1085{
1086   struct hash_entry *entry = _mesa_hash_table_search(ctx->write_jobs, prsc);
1087
1088   if (entry) {
1089      struct lima_job *job = entry->data;
1090
1091      /* do not flush current job */
1092      if (job != ctx->job)
1093         lima_do_job(job);
1094   }
1095}
1096
1097static void
1098lima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1099                unsigned flags)
1100{
1101   struct lima_context *ctx = lima_context(pctx);
1102
1103   lima_flush(ctx);
1104
1105   if (fence) {
1106      int drm_fd = lima_screen(ctx->base.screen)->fd;
1107      int fd;
1108
1109      if (!drmSyncobjExportSyncFile(drm_fd, ctx->out_sync[LIMA_PIPE_PP], &fd))
1110         *fence = lima_fence_create(fd);
1111   }
1112}
1113
1114static bool
1115lima_job_compare(const void *s1, const void *s2)
1116{
1117   return memcmp(s1, s2, sizeof(struct lima_job_key)) == 0;
1118}
1119
1120static uint32_t
1121lima_job_hash(const void *key)
1122{
1123   return _mesa_hash_data(key, sizeof(struct lima_job_key));
1124}
1125
1126bool lima_job_init(struct lima_context *ctx)
1127{
1128   int fd = lima_screen(ctx->base.screen)->fd;
1129
1130   ctx->jobs = _mesa_hash_table_create(ctx, lima_job_hash, lima_job_compare);
1131   if (!ctx->jobs)
1132      return false;
1133
1134   ctx->write_jobs = _mesa_hash_table_create(
1135      ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
1136   if (!ctx->write_jobs)
1137      return false;
1138
1139   ctx->in_sync_fd = -1;
1140
1141   for (int i = 0; i < 2; i++) {
1142      if (drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->in_sync + i) ||
1143          drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->out_sync + i))
1144         return false;
1145   }
1146
1147   ctx->base.flush = lima_pipe_flush;
1148
1149   return true;
1150}
1151
1152void lima_job_fini(struct lima_context *ctx)
1153{
1154   int fd = lima_screen(ctx->base.screen)->fd;
1155
1156   lima_flush(ctx);
1157
1158   for (int i = 0; i < 2; i++) {
1159      if (ctx->in_sync[i])
1160         drmSyncobjDestroy(fd, ctx->in_sync[i]);
1161      if (ctx->out_sync[i])
1162         drmSyncobjDestroy(fd, ctx->out_sync[i]);
1163   }
1164
1165   if (ctx->in_sync_fd >= 0)
1166      close(ctx->in_sync_fd);
1167}
1168