1/*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "nv50/nv50_context.h"
24#include "nv50/nv50_resource.h"
25#include "nv50/g80_texture.xml.h"
26#include "nv50/g80_defs.xml.h"
27
28#include "util/format/u_format.h"
29
30static inline uint32_t
31nv50_tic_swizzle(const struct nv50_format *fmt, unsigned swz, bool tex_int)
32{
33   switch (swz) {
34   case PIPE_SWIZZLE_X  : return fmt->tic.src_x;
35   case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
36   case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
37   case PIPE_SWIZZLE_W: return fmt->tic.src_w;
38   case PIPE_SWIZZLE_1:
39      return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
40   case PIPE_SWIZZLE_0:
41   default:
42      return G80_TIC_SOURCE_ZERO;
43   }
44}
45
46struct pipe_sampler_view *
47nv50_create_sampler_view(struct pipe_context *pipe,
48                         struct pipe_resource *res,
49                         const struct pipe_sampler_view *templ)
50{
51   uint32_t flags = 0;
52
53   if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
54      flags |= NV50_TEXVIEW_SCALED_COORDS;
55
56   return nv50_create_texture_view(pipe, res, templ, flags);
57}
58
59struct pipe_sampler_view *
60nv50_create_texture_view(struct pipe_context *pipe,
61                         struct pipe_resource *texture,
62                         const struct pipe_sampler_view *templ,
63                         uint32_t flags)
64{
65   const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d;
66   const struct util_format_description *desc;
67   const struct nv50_format *fmt;
68   uint64_t addr;
69   uint32_t *tic;
70   uint32_t swz[4];
71   uint32_t depth;
72   struct nv50_tic_entry *view;
73   struct nv50_miptree *mt = nv50_miptree(texture);
74   bool tex_int;
75
76   view = MALLOC_STRUCT(nv50_tic_entry);
77   if (!view)
78      return NULL;
79
80   view->pipe = *templ;
81   view->pipe.reference.count = 1;
82   view->pipe.texture = NULL;
83   view->pipe.context = pipe;
84
85   view->id = -1;
86
87   pipe_resource_reference(&view->pipe.texture, texture);
88
89   tic = &view->tic[0];
90
91   desc = util_format_description(view->pipe.format);
92
93   /* TIC[0] */
94
95   fmt = &nv50_format_table[view->pipe.format];
96
97   tex_int = util_format_is_pure_integer(view->pipe.format);
98
99   swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
100   swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
101   swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
102   swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
103   tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
104            (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
105            (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
106            (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
107            (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
108            (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
109            (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
110            (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
111            (swz[3] << G80_TIC_0_W_SOURCE__SHIFT);
112
113   addr = mt->base.address;
114
115   depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
116
117   if (mt->base.base.array_size > 1) {
118      /* there doesn't seem to be a base layer field in TIC */
119      addr += view->pipe.u.tex.first_layer * mt->layer_stride;
120      depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
121   }
122
123   tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
124
125   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
126      tic[2] |= G80_TIC_2_SRGB_CONVERSION;
127
128   if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
129      tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
130
131   if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
132      if (templ->target == PIPE_BUFFER) {
133         addr += view->pipe.u.buf.offset;
134         tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
135         tic[3] = 0;
136         tic[4] = /* width */
137            view->pipe.u.buf.size / (desc->block.bits / 8);
138         tic[5] = 0;
139      } else {
140         tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
141         tic[3] = mt->level[0].pitch;
142         tic[4] = mt->base.base.width0;
143         tic[5] = (1 << 16) | (mt->base.base.height0);
144      }
145      tic[6] =
146      tic[7] = 0;
147      tic[1] = addr;
148      tic[2] |= addr >> 32;
149      return &view->pipe;
150   }
151
152   tic[1] = addr;
153   tic[2] |= (addr >> 32) & 0xff;
154
155   tic[2] |=
156      ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
157      ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
158
159   switch (templ->target) {
160   case PIPE_TEXTURE_1D:
161      tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
162      break;
163   case PIPE_TEXTURE_2D:
164      if (mt->ms_x)
165         tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
166      else
167         tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
168      break;
169   case PIPE_TEXTURE_RECT:
170      tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
171      break;
172   case PIPE_TEXTURE_3D:
173      tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
174      break;
175   case PIPE_TEXTURE_CUBE:
176      depth /= 6;
177      tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
178      break;
179   case PIPE_TEXTURE_1D_ARRAY:
180      tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
181      break;
182   case PIPE_TEXTURE_2D_ARRAY:
183      tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
184      break;
185   case PIPE_TEXTURE_CUBE_ARRAY:
186      depth /= 6;
187      tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
188      break;
189   case PIPE_BUFFER:
190      assert(0); /* should be linear and handled above ! */
191      tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER | G80_TIC_2_LAYOUT_PITCH;
192      break;
193   default:
194      unreachable("unexpected/invalid texture target");
195   }
196
197   tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
198
199   tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x);
200
201   tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
202   tic[5] |= depth << 16;
203   if (class_3d > NV50_3D_CLASS)
204      tic[5] |= mt->base.base.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;
205   else
206      tic[5] |= view->pipe.u.tex.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;
207
208   tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */
209
210   if (class_3d > NV50_3D_CLASS)
211      tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
212   else
213      tic[7] = 0;
214
215   if (unlikely(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS)))
216      if (mt->base.base.last_level)
217         tic[5] &= ~G80_TIC_5_MAP_MIP_LEVEL__MASK;
218
219   return &view->pipe;
220}
221
222static void
223nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
224                struct nv04_resource *res)
225{
226   uint64_t address = res->address;
227   if (res->base.target != PIPE_BUFFER)
228      return;
229   address += tic->pipe.u.buf.offset;
230   if (tic->tic[1] == (uint32_t)address &&
231       (tic->tic[2] & 0xff) == address >> 32)
232      return;
233
234   nv50_screen_tic_unlock(nv50->screen, tic);
235   tic->id = -1;
236   tic->tic[1] = address;
237   tic->tic[2] &= 0xffffff00;
238   tic->tic[2] |= address >> 32;
239}
240
241bool
242nv50_validate_tic(struct nv50_context *nv50, int s)
243{
244   struct nouveau_pushbuf *push = nv50->base.pushbuf;
245   struct nouveau_bo *txc = nv50->screen->txc;
246   unsigned i;
247   bool need_flush = false;
248   const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE;
249
250   assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
251   for (i = 0; i < nv50->num_textures[s]; ++i) {
252      struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
253      struct nv04_resource *res;
254
255      if (!tic) {
256         if (unlikely(is_compute_stage))
257            BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);
258         else
259            BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
260         PUSH_DATA (push, (i << 1) | 0);
261         continue;
262      }
263      res = &nv50_miptree(tic->pipe.texture)->base;
264      nv50_update_tic(nv50, tic, res);
265
266      if (tic->id < 0) {
267         tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
268
269         BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
270         PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
271         PUSH_DATA (push, 1);
272         BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
273         PUSH_DATA (push, 262144);
274         PUSH_DATA (push, 65536);
275         PUSH_DATA (push, 1);
276         PUSH_DATAh(push, txc->offset);
277         PUSH_DATA (push, txc->offset);
278         BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
279         PUSH_DATA (push, 0);
280         PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
281         BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
282         PUSH_DATA (push, 32);
283         PUSH_DATA (push, 1);
284         PUSH_DATA (push, 0);
285         PUSH_DATA (push, 1);
286         PUSH_DATA (push, 0);
287         PUSH_DATA (push, 1);
288         PUSH_DATA (push, 0);
289         PUSH_DATA (push, tic->id * 32);
290         PUSH_DATA (push, 0);
291         PUSH_DATA (push, 0);
292         BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8);
293         PUSH_DATAp(push, &tic->tic[0], 8);
294
295         need_flush = true;
296      } else
297      if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
298         if (unlikely(is_compute_stage))
299            BEGIN_NV04(push, NV50_CP(TEX_CACHE_CTL), 1);
300         else
301            BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
302         PUSH_DATA (push, 0x20);
303      }
304
305      nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
306
307      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
308      res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
309
310      if (unlikely(is_compute_stage)) {
311         BCTX_REFN(nv50->bufctx_cp, CP_TEXTURES, res, RD);
312         BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);
313      } else {
314         BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD);
315         BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
316      }
317      PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1);
318   }
319   for (; i < nv50->state.num_textures[s]; ++i) {
320      if (unlikely(is_compute_stage))
321         BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);
322      else
323         BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
324      PUSH_DATA (push, (i << 1) | 0);
325   }
326   if (nv50->num_textures[s]) {
327      if (unlikely(is_compute_stage))
328         BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
329      else
330         BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
331      PUSH_DATA (push, ((NV50_CB_AUX_TEX_MS_OFFSET + 16 * s * 2 * 4) << (8 - 2)) | NV50_CB_AUX);
332      if (unlikely(is_compute_stage))
333         BEGIN_NV04(push, NV50_CP(CB_DATA(0)), nv50->num_textures[s] * 2);
334      else
335         BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2);
336      for (i = 0; i < nv50->num_textures[s]; i++) {
337         struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
338         struct nv50_miptree *res;
339
340         if (!tic || tic->pipe.target == PIPE_BUFFER) {
341            PUSH_DATA (push, 0);
342            PUSH_DATA (push, 0);
343            continue;
344         }
345         res = nv50_miptree(tic->pipe.texture);
346         PUSH_DATA (push, res->ms_x);
347         PUSH_DATA (push, res->ms_y);
348      }
349   }
350   nv50->state.num_textures[s] = nv50->num_textures[s];
351
352   return need_flush;
353}
354
355void nv50_validate_textures(struct nv50_context *nv50)
356{
357   unsigned s;
358   bool need_flush = false;
359
360   for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s)
361      need_flush |= nv50_validate_tic(nv50, s);
362
363   if (need_flush) {
364      BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1);
365      PUSH_DATA (nv50->base.pushbuf, 0);
366   }
367
368   /* Invalidate all CP textures because they are aliased. */
369   nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES);
370   nv50->dirty_cp |= NV50_NEW_CP_TEXTURES;
371}
372
373bool
374nv50_validate_tsc(struct nv50_context *nv50, int s)
375{
376   struct nouveau_pushbuf *push = nv50->base.pushbuf;
377   unsigned i;
378   bool need_flush = false;
379   const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE;
380
381   assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
382   for (i = 0; i < nv50->num_samplers[s]; ++i) {
383      struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]);
384
385      if (!tsc) {
386         if (is_compute_stage)
387            BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
388         else
389            BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
390         PUSH_DATA (push, (i << 4) | 0);
391         continue;
392      }
393      nv50->seamless_cube_map = tsc->seamless_cube_map;
394      if (tsc->id < 0) {
395         tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);
396
397         nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
398                             65536 + tsc->id * 32,
399                             NOUVEAU_BO_VRAM, 32, tsc->tsc);
400         need_flush = true;
401      }
402      nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
403
404      if (is_compute_stage)
405         BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
406      else
407         BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
408      PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1);
409   }
410   for (; i < nv50->state.num_samplers[s]; ++i) {
411      if (is_compute_stage)
412         BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
413      else
414         BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
415      PUSH_DATA (push, (i << 4) | 0);
416   }
417   nv50->state.num_samplers[s] = nv50->num_samplers[s];
418
419   // TXF, in unlinked tsc mode, will always use sampler 0. So we have to
420   // ensure that it remains bound. Its contents don't matter, all samplers we
421   // ever create have the SRGB_CONVERSION bit set, so as long as the first
422   // entry is initialized, we're good to go. This is the only bit that has
423   // any effect on what TXF does.
424   if (!nv50->samplers[s][0]) {
425      if (is_compute_stage)
426         BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
427      else
428         BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
429      PUSH_DATA (push, 1);
430   }
431
432   return need_flush;
433}
434
435void nv50_validate_samplers(struct nv50_context *nv50)
436{
437   unsigned s;
438   bool need_flush = false;
439
440   for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s)
441      need_flush |= nv50_validate_tsc(nv50, s);
442
443   if (need_flush) {
444      if (unlikely(s == NV50_SHADER_STAGE_COMPUTE))
445         // TODO(pmoreau): Is this needed? Not done on nvc0
446         BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1);
447      else
448         BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1);
449      PUSH_DATA (nv50->base.pushbuf, 0);
450   }
451
452   /* Invalidate all CP samplers because they are aliased. */
453   nv50->dirty_cp |= NV50_NEW_CP_SAMPLERS;
454}
455
456/* There can be up to 4 different MS levels (1, 2, 4, 8). To simplify the
457 * shader logic, allow each one to take up 8 offsets.
458 */
459#define COMBINE(x, y) x, y
460#define DUMMY 0, 0
461static const uint32_t msaa_sample_xy_offsets[] = {
462   /* MS1 */
463   COMBINE(0, 0),
464   DUMMY,
465   DUMMY,
466   DUMMY,
467   DUMMY,
468   DUMMY,
469   DUMMY,
470   DUMMY,
471
472   /* MS2 */
473   COMBINE(0, 0),
474   COMBINE(1, 0),
475   DUMMY,
476   DUMMY,
477   DUMMY,
478   DUMMY,
479   DUMMY,
480   DUMMY,
481
482   /* MS4 */
483   COMBINE(0, 0),
484   COMBINE(1, 0),
485   COMBINE(0, 1),
486   COMBINE(1, 1),
487   DUMMY,
488   DUMMY,
489   DUMMY,
490   DUMMY,
491
492   /* MS8 */
493   COMBINE(0, 0),
494   COMBINE(1, 0),
495   COMBINE(0, 1),
496   COMBINE(1, 1),
497   COMBINE(2, 0),
498   COMBINE(3, 0),
499   COMBINE(2, 1),
500   COMBINE(3, 1),
501};
502
503void nv50_upload_ms_info(struct nouveau_pushbuf *push)
504{
505   BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
506   PUSH_DATA (push, (NV50_CB_AUX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX);
507   BEGIN_NI04(push, NV50_3D(CB_DATA(0)), ARRAY_SIZE(msaa_sample_xy_offsets));
508   PUSH_DATAp(push, msaa_sample_xy_offsets, ARRAY_SIZE(msaa_sample_xy_offsets));
509}
510
511void nv50_upload_tsc0(struct nv50_context *nv50)
512{
513   struct nouveau_pushbuf *push = nv50->base.pushbuf;
514   u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };
515   nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
516                       65536 /* + tsc->id * 32 */,
517                       NOUVEAU_BO_VRAM, 32, data);
518   BEGIN_NV04(push, NV50_3D(TSC_FLUSH), 1);
519   PUSH_DATA (push, 0);
520}
521