1
2#include "util/format/u_format.h"
3
4#include "nv50/nv50_context.h"
5
6#include "nv50/g80_defs.xml.h"
7
8struct nv50_transfer {
9   struct pipe_transfer base;
10   struct nv50_m2mf_rect rect[2];
11   uint32_t nblocksx;
12   uint32_t nblocksy;
13};
14
15void
16nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect,
17                     struct pipe_resource *restrict res, unsigned l,
18                     unsigned x, unsigned y, unsigned z)
19{
20   struct nv50_miptree *mt = nv50_miptree(res);
21   const unsigned w = u_minify(res->width0, l);
22   const unsigned h = u_minify(res->height0, l);
23
24   rect->bo = mt->base.bo;
25   rect->domain = mt->base.domain;
26   rect->base = mt->level[l].offset;
27   if (mt->base.bo->offset != mt->base.address)
28      rect->base += mt->base.address - mt->base.bo->offset;
29   rect->pitch = mt->level[l].pitch;
30   if (util_format_is_plain(res->format)) {
31      rect->width = w << mt->ms_x;
32      rect->height = h << mt->ms_y;
33      rect->x = x << mt->ms_x;
34      rect->y = y << mt->ms_y;
35   } else {
36      rect->width = util_format_get_nblocksx(res->format, w);
37      rect->height = util_format_get_nblocksy(res->format, h);
38      rect->x = util_format_get_nblocksx(res->format, x);
39      rect->y = util_format_get_nblocksy(res->format, y);
40   }
41   rect->tile_mode = mt->level[l].tile_mode;
42   rect->cpp = util_format_get_blocksize(res->format);
43
44   if (mt->layout_3d) {
45      rect->z = z;
46      rect->depth = u_minify(res->depth0, l);
47   } else {
48      rect->base += z * mt->layer_stride;
49      rect->z = 0;
50      rect->depth = 1;
51   }
52}
53
54/* This is very similar to nv50_2d_texture_do_copy, but doesn't require
55 * miptree objects. Maybe refactor? Although it's not straightforward.
56 */
57static void
58nv50_2d_transfer_rect(struct nv50_context *nv50,
59                      const struct nv50_m2mf_rect *dst,
60                      const struct nv50_m2mf_rect *src,
61                      uint32_t nblocksx, uint32_t nblocksy)
62{
63   struct nouveau_pushbuf *push = nv50->base.pushbuf;
64   struct nouveau_bufctx *bctx = nv50->bufctx;
65   const int cpp = dst->cpp;
66
67   nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
68   nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
69   nouveau_pushbuf_bufctx(push, bctx);
70   nouveau_pushbuf_validate(push);
71
72   uint32_t format;
73   switch (cpp) {
74   case 1:
75      format = G80_SURFACE_FORMAT_R8_UNORM;
76      break;
77   case 2:
78      format = G80_SURFACE_FORMAT_R16_UNORM;
79      break;
80   case 4:
81      format = G80_SURFACE_FORMAT_BGRA8_UNORM;
82      break;
83   case 8:
84      format = G80_SURFACE_FORMAT_RGBA16_FLOAT;
85      break;
86   case 16:
87      format = G80_SURFACE_FORMAT_RGBA32_FLOAT;
88      break;
89   default:
90      assert(!"Unexpected cpp");
91      format = G80_SURFACE_FORMAT_R8_UNORM;
92   }
93
94   if (nouveau_bo_memtype(src->bo)) {
95      BEGIN_NV04(push, NV50_2D(SRC_FORMAT), 5);
96      PUSH_DATA (push, format);
97      PUSH_DATA (push, 0);
98      PUSH_DATA (push, src->tile_mode);
99      PUSH_DATA (push, src->depth);
100      PUSH_DATA (push, src->z);
101      BEGIN_NV04(push, NV50_2D(SRC_WIDTH), 4);
102      PUSH_DATA (push, src->width);
103      PUSH_DATA (push, src->height);
104      PUSH_DATAh(push, src->bo->offset + src->base);
105      PUSH_DATA (push, src->bo->offset + src->base);
106   } else {
107      BEGIN_NV04(push, NV50_2D(SRC_FORMAT), 2);
108      PUSH_DATA (push, format);
109      PUSH_DATA (push, 1);
110      BEGIN_NV04(push, NV50_2D(SRC_PITCH), 5);
111      PUSH_DATA (push, src->pitch);
112      PUSH_DATA (push, src->width);
113      PUSH_DATA (push, src->height);
114      PUSH_DATAh(push, src->bo->offset + src->base);
115      PUSH_DATA (push, src->bo->offset + src->base);
116   }
117
118   if (nouveau_bo_memtype(dst->bo)) {
119      BEGIN_NV04(push, NV50_2D(DST_FORMAT), 5);
120      PUSH_DATA (push, format);
121      PUSH_DATA (push, 0);
122      PUSH_DATA (push, dst->tile_mode);
123      PUSH_DATA (push, dst->depth);
124      PUSH_DATA (push, dst->z);
125      BEGIN_NV04(push, NV50_2D(DST_WIDTH), 4);
126      PUSH_DATA (push, dst->width);
127      PUSH_DATA (push, dst->height);
128      PUSH_DATAh(push, dst->bo->offset + dst->base);
129      PUSH_DATA (push, dst->bo->offset + dst->base);
130   } else {
131      BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
132      PUSH_DATA (push, format);
133      PUSH_DATA (push, 1);
134      BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
135      PUSH_DATA (push, dst->pitch);
136      PUSH_DATA (push, dst->width);
137      PUSH_DATA (push, dst->height);
138      PUSH_DATAh(push, dst->bo->offset + dst->base);
139      PUSH_DATA (push, dst->bo->offset + dst->base);
140   }
141
142   BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1);
143   PUSH_DATA (push, NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE);
144   BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4);
145   PUSH_DATA (push, dst->x);
146   PUSH_DATA (push, dst->y);
147   PUSH_DATA (push, nblocksx);
148   PUSH_DATA (push, nblocksy);
149   BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4);
150   PUSH_DATA (push, 0);
151   PUSH_DATA (push, 1);
152   PUSH_DATA (push, 0);
153   PUSH_DATA (push, 1);
154   BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4);
155   PUSH_DATA (push, 0);
156   PUSH_DATA (push, src->x);
157   PUSH_DATA (push, 0);
158   PUSH_DATA (push, src->y);
159
160   nouveau_bufctx_reset(bctx, 0);
161}
162
163void
164nv50_m2mf_transfer_rect(struct nv50_context *nv50,
165                        const struct nv50_m2mf_rect *dst,
166                        const struct nv50_m2mf_rect *src,
167                        uint32_t nblocksx, uint32_t nblocksy)
168{
169   struct nouveau_pushbuf *push = nv50->base.pushbuf;
170   struct nouveau_bufctx *bctx = nv50->bufctx;
171   const int cpp = dst->cpp;
172   uint32_t src_ofst = src->base;
173   uint32_t dst_ofst = dst->base;
174   uint32_t height = nblocksy;
175   uint32_t sy = src->y;
176   uint32_t dy = dst->y;
177
178   assert(dst->cpp == src->cpp);
179
180   /* Workaround: M2MF appears to break at the 64k boundary for tiled
181    * textures, which can really only happen with RGBA32 formats.
182    */
183   bool eng2d = false;
184   if (nouveau_bo_memtype(src->bo)) {
185      if (src->width * cpp > 65536)
186         eng2d = true;
187   }
188   if (nouveau_bo_memtype(dst->bo)) {
189      if (dst->width * cpp > 65536)
190         eng2d = true;
191   }
192   if (eng2d) {
193      nv50_2d_transfer_rect(nv50, dst, src, nblocksx, nblocksy);
194      return;
195   }
196
197   nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
198   nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
199   nouveau_pushbuf_bufctx(push, bctx);
200   nouveau_pushbuf_validate(push);
201
202   if (nouveau_bo_memtype(src->bo)) {
203      BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 6);
204      PUSH_DATA (push, 0);
205      PUSH_DATA (push, src->tile_mode);
206      PUSH_DATA (push, src->width * cpp);
207      PUSH_DATA (push, src->height);
208      PUSH_DATA (push, src->depth);
209      PUSH_DATA (push, src->z);
210   } else {
211      src_ofst += src->y * src->pitch + src->x * cpp;
212
213      BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);
214      PUSH_DATA (push, 1);
215      BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_IN), 1);
216      PUSH_DATA (push, src->pitch);
217   }
218
219   if (nouveau_bo_memtype(dst->bo)) {
220      BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 6);
221      PUSH_DATA (push, 0);
222      PUSH_DATA (push, dst->tile_mode);
223      PUSH_DATA (push, dst->width * cpp);
224      PUSH_DATA (push, dst->height);
225      PUSH_DATA (push, dst->depth);
226      PUSH_DATA (push, dst->z);
227   } else {
228      dst_ofst += dst->y * dst->pitch + dst->x * cpp;
229
230      BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);
231      PUSH_DATA (push, 1);
232      BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_OUT), 1);
233      PUSH_DATA (push, dst->pitch);
234   }
235
236   while (height) {
237      int line_count = height > 2047 ? 2047 : height;
238
239      BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);
240      PUSH_DATAh(push, src->bo->offset + src_ofst);
241      PUSH_DATAh(push, dst->bo->offset + dst_ofst);
242
243      BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);
244      PUSH_DATA (push, src->bo->offset + src_ofst);
245      PUSH_DATA (push, dst->bo->offset + dst_ofst);
246
247      if (nouveau_bo_memtype(src->bo)) {
248         BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_IN), 1);
249         PUSH_DATA (push, (sy << 16) | (src->x * cpp));
250      } else {
251         src_ofst += line_count * src->pitch;
252      }
253      if (nouveau_bo_memtype(dst->bo)) {
254         BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_OUT), 1);
255         PUSH_DATA (push, (dy << 16) | (dst->x * cpp));
256      } else {
257         dst_ofst += line_count * dst->pitch;
258      }
259
260      BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);
261      PUSH_DATA (push, nblocksx * cpp);
262      PUSH_DATA (push, line_count);
263      PUSH_DATA (push, (1 << 8) | (1 << 0));
264      PUSH_DATA (push, 0);
265
266      height -= line_count;
267      sy += line_count;
268      dy += line_count;
269   }
270
271   nouveau_bufctx_reset(bctx, 0);
272}
273
274void
275nv50_sifc_linear_u8(struct nouveau_context *nv,
276                    struct nouveau_bo *dst, unsigned offset, unsigned domain,
277                    unsigned size, const void *data)
278{
279   struct nv50_context *nv50 = nv50_context(&nv->pipe);
280   struct nouveau_pushbuf *push = nv50->base.pushbuf;
281   uint32_t *src = (uint32_t *)data;
282   unsigned count = (size + 3) / 4;
283   unsigned xcoord = offset & 0xff;
284
285   nouveau_bufctx_refn(nv50->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
286   nouveau_pushbuf_bufctx(push, nv50->bufctx);
287   nouveau_pushbuf_validate(push);
288
289   offset &= ~0xff;
290
291   BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
292   PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
293   PUSH_DATA (push, 1);
294   BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
295   PUSH_DATA (push, 262144);
296   PUSH_DATA (push, 65536);
297   PUSH_DATA (push, 1);
298   PUSH_DATAh(push, dst->offset + offset);
299   PUSH_DATA (push, dst->offset + offset);
300   BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
301   PUSH_DATA (push, 0);
302   PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
303   BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
304   PUSH_DATA (push, size);
305   PUSH_DATA (push, 1);
306   PUSH_DATA (push, 0);
307   PUSH_DATA (push, 1);
308   PUSH_DATA (push, 0);
309   PUSH_DATA (push, 1);
310   PUSH_DATA (push, 0);
311   PUSH_DATA (push, xcoord);
312   PUSH_DATA (push, 0);
313   PUSH_DATA (push, 0);
314
315   while (count) {
316      unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
317
318      BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
319      PUSH_DATAp(push, src, nr);
320
321      src += nr;
322      count -= nr;
323   }
324
325   nouveau_bufctx_reset(nv50->bufctx, 0);
326}
327
328void
329nv50_m2mf_copy_linear(struct nouveau_context *nv,
330                      struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
331                      struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
332                      unsigned size)
333{
334   struct nouveau_pushbuf *push = nv->pushbuf;
335   struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx;
336
337   nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
338   nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
339   nouveau_pushbuf_bufctx(push, bctx);
340   nouveau_pushbuf_validate(push);
341
342   BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);
343   PUSH_DATA (push, 1);
344   BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);
345   PUSH_DATA (push, 1);
346
347   while (size) {
348      unsigned bytes = MIN2(size, 1 << 17);
349
350      BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);
351      PUSH_DATAh(push, src->offset + srcoff);
352      PUSH_DATAh(push, dst->offset + dstoff);
353      BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);
354      PUSH_DATA (push, src->offset + srcoff);
355      PUSH_DATA (push, dst->offset + dstoff);
356      BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);
357      PUSH_DATA (push, bytes);
358      PUSH_DATA (push, 1);
359      PUSH_DATA (push, (1 << 8) | (1 << 0));
360      PUSH_DATA (push, 0);
361
362      srcoff += bytes;
363      dstoff += bytes;
364      size -= bytes;
365   }
366
367   nouveau_bufctx_reset(bctx, 0);
368}
369
370void *
371nv50_miptree_transfer_map(struct pipe_context *pctx,
372                          struct pipe_resource *res,
373                          unsigned level,
374                          unsigned usage,
375                          const struct pipe_box *box,
376                          struct pipe_transfer **ptransfer)
377{
378   struct nv50_screen *screen = nv50_screen(pctx->screen);
379   struct nv50_context *nv50 = nv50_context(pctx);
380   struct nouveau_device *dev = nv50->screen->base.device;
381   const struct nv50_miptree *mt = nv50_miptree(res);
382   struct nv50_transfer *tx;
383   uint32_t size;
384   int ret;
385   unsigned flags = 0;
386
387   if (usage & PIPE_MAP_DIRECTLY)
388      return NULL;
389
390   tx = CALLOC_STRUCT(nv50_transfer);
391   if (!tx)
392      return NULL;
393
394   pipe_resource_reference(&tx->base.resource, res);
395
396   tx->base.level = level;
397   tx->base.usage = usage;
398   tx->base.box = *box;
399
400   if (util_format_is_plain(res->format)) {
401      tx->nblocksx = box->width << mt->ms_x;
402      tx->nblocksy = box->height << mt->ms_y;
403   } else {
404      tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
405      tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
406   }
407
408   tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
409   tx->base.layer_stride = tx->nblocksy * tx->base.stride;
410
411   nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z);
412
413   size = tx->base.layer_stride;
414
415   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
416                        size * tx->base.box.depth, NULL, &tx->rect[1].bo);
417   if (ret) {
418      FREE(tx);
419      return NULL;
420   }
421
422   tx->rect[1].cpp = tx->rect[0].cpp;
423   tx->rect[1].width = tx->nblocksx;
424   tx->rect[1].height = tx->nblocksy;
425   tx->rect[1].depth = 1;
426   tx->rect[1].pitch = tx->base.stride;
427   tx->rect[1].domain = NOUVEAU_BO_GART;
428
429   if (usage & PIPE_MAP_READ) {
430      unsigned base = tx->rect[0].base;
431      unsigned z = tx->rect[0].z;
432      unsigned i;
433      for (i = 0; i < box->depth; ++i) {
434         nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0],
435                                 tx->nblocksx, tx->nblocksy);
436         if (mt->layout_3d)
437            tx->rect[0].z++;
438         else
439            tx->rect[0].base += mt->layer_stride;
440         tx->rect[1].base += size;
441      }
442      tx->rect[0].z = z;
443      tx->rect[0].base = base;
444      tx->rect[1].base = 0;
445   }
446
447   if (tx->rect[1].bo->map) {
448      *ptransfer = &tx->base;
449      return tx->rect[1].bo->map;
450   }
451
452   if (usage & PIPE_MAP_READ)
453      flags = NOUVEAU_BO_RD;
454   if (usage & PIPE_MAP_WRITE)
455      flags |= NOUVEAU_BO_WR;
456
457   ret = nouveau_bo_map(tx->rect[1].bo, flags, screen->base.client);
458   if (ret) {
459      nouveau_bo_ref(NULL, &tx->rect[1].bo);
460      FREE(tx);
461      return NULL;
462   }
463
464   *ptransfer = &tx->base;
465   return tx->rect[1].bo->map;
466}
467
468void
469nv50_miptree_transfer_unmap(struct pipe_context *pctx,
470                            struct pipe_transfer *transfer)
471{
472   struct nv50_context *nv50 = nv50_context(pctx);
473   struct nv50_transfer *tx = (struct nv50_transfer *)transfer;
474   struct nv50_miptree *mt = nv50_miptree(tx->base.resource);
475   unsigned i;
476
477   if (tx->base.usage & PIPE_MAP_WRITE) {
478      for (i = 0; i < tx->base.box.depth; ++i) {
479         nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1],
480                                 tx->nblocksx, tx->nblocksy);
481         if (mt->layout_3d)
482            tx->rect[0].z++;
483         else
484            tx->rect[0].base += mt->layer_stride;
485         tx->rect[1].base += tx->nblocksy * tx->base.stride;
486      }
487
488      /* Allow the copies above to finish executing before freeing the source */
489      nouveau_fence_work(nv50->screen->base.fence.current,
490                         nouveau_fence_unref_bo, tx->rect[1].bo);
491   } else {
492      nouveau_bo_ref(NULL, &tx->rect[1].bo);
493   }
494
495   pipe_resource_reference(&transfer->resource, NULL);
496
497   FREE(tx);
498}
499
500static void
501nv50_cb_bo_push(struct nouveau_context *nv,
502                struct nouveau_bo *bo, unsigned domain,
503                unsigned bufid,
504                unsigned offset, unsigned words,
505                const uint32_t *data)
506{
507   struct nouveau_pushbuf *push = nv->pushbuf;
508
509   assert(!(offset & 3));
510
511   while (words) {
512      unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
513
514      PUSH_SPACE(push, nr + 3);
515      PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
516      BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
517      PUSH_DATA (push, (offset << 6) | bufid);
518      BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
519      PUSH_DATAp(push, data, nr);
520
521      words -= nr;
522      data += nr;
523      offset += nr * 4;
524   }
525}
526
527void
528nv50_cb_push(struct nouveau_context *nv,
529             struct nv04_resource *res,
530             unsigned offset, unsigned words, const uint32_t *data)
531{
532   struct nv50_context *nv50 = nv50_context(&nv->pipe);
533   struct nv50_constbuf *cb = NULL;
534   int s, bufid;
535   /* Go through all the constbuf binding points of this buffer and try to
536    * find one which contains the region to be updated.
537    */
538   for (s = 0; s < NV50_MAX_SHADER_STAGES && !cb; s++) {
539      uint16_t bindings = res->cb_bindings[s];
540      while (bindings) {
541         int i = ffs(bindings) - 1;
542         uint32_t cb_offset = nv50->constbuf[s][i].offset;
543
544         bindings &= ~(1 << i);
545         if (cb_offset <= offset &&
546             cb_offset + nv50->constbuf[s][i].size >= offset + words * 4) {
547            cb = &nv50->constbuf[s][i];
548            bufid = s * 16 + i;
549            break;
550         }
551      }
552   }
553
554   if (cb) {
555      nv50_cb_bo_push(nv, res->bo, res->domain,
556                      bufid, offset - cb->offset, words, data);
557   } else {
558      nv->push_data(nv, res->bo, res->offset + offset, res->domain,
559                    words * 4, data);
560   }
561}
562