1#include "util/u_format.h"
2#include "util/u_framebuffer.h"
3#include "util/u_math.h"
4#include "util/u_viewport.h"
5
6#include "nvc0/nvc0_context.h"
7
8#if 0
9static void
10nvc0_validate_zcull(struct nvc0_context *nvc0)
11{
12    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
13    struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14    struct nv50_surface *sf = nv50_surface(fb->zsbuf);
15    struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
16    struct nouveau_bo *bo = mt->base.bo;
17    uint32_t size;
18    uint32_t offset = align(mt->total_size, 1 << 17);
19    unsigned width, height;
20
21    assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
22
23    size = mt->total_size * 2;
24
25    height = align(fb->height, 32);
26    width = fb->width % 224;
27    if (width)
28       width = fb->width + (224 - width);
29    else
30       width = fb->width;
31
32    BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);
33    PUSH_DATA (push, 0);
34    BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);
35    PUSH_DATAh(push, bo->offset + offset);
36    PUSH_DATA (push, bo->offset + offset);
37    offset += 1 << 17;
38    BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);
39    PUSH_DATAh(push, bo->offset + offset);
40    PUSH_DATA (push, bo->offset + offset);
41    BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);
42    PUSH_DATA (push, size);
43    PUSH_DATA (push, size >> 16);
44    BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */
45    PUSH_DATA (push, 2);
46    BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);
47    PUSH_DATA (push, width);
48    PUSH_DATA (push, height);
49    PUSH_DATA (push, 1);
50    PUSH_DATA (push, 0);
51    BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);
52    PUSH_DATA (push, 0);
53    PUSH_DATA (push, 0);
54    BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
55    PUSH_DATA (push, 0);
56}
57#endif
58
59static inline void
60nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
61{
62   BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
63   PUSH_DATA (push, 0);
64   PUSH_DATA (push, 0);
65   PUSH_DATA (push, 64);     // width
66   PUSH_DATA (push, 0);      // height
67   PUSH_DATA (push, 0);      // format
68   PUSH_DATA (push, 0);      // tile mode
69   PUSH_DATA (push, layers); // layers
70   PUSH_DATA (push, 0);      // layer stride
71   PUSH_DATA (push, 0);      // base layer
72}
73
74static uint32_t
75gm200_encode_cb_sample_location(uint8_t x, uint8_t y)
76{
77   static const uint8_t lut[] = {
78      0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
79      0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
80   uint32_t result = 0;
81   /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
82   result |= lut[x] << 8 | lut[y] << 24;
83   /* fill in gaps with data in a representation for SV_SAMPLE_POS */
84   result |= x << 12 | y << 28;
85   return result;
86}
87
88static void
89gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
90{
91   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
92   struct nvc0_screen *screen = nvc0->screen;
93   unsigned grid_width, grid_height, hw_grid_width;
94   uint8_t sample_locations[16][2];
95   unsigned cb[64];
96   unsigned i, pixel, pixel_y, pixel_x, sample;
97   uint32_t packed_locations[4] = {};
98
99   screen->base.base.get_sample_pixel_grid(
100      &screen->base.base, ms, &grid_width, &grid_height);
101
102   hw_grid_width = grid_width;
103   if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
104      hw_grid_width = 4;
105
106   if (nvc0->sample_locations_enabled) {
107      uint8_t locations[2 * 4 * 8];
108      memcpy(locations, nvc0->sample_locations, sizeof(locations));
109      util_sample_locations_flip_y(
110         &screen->base.base, nvc0->framebuffer.height, ms, locations);
111
112      for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
113         for (sample = 0; sample < ms; sample++) {
114            unsigned pixel_x = pixel % hw_grid_width;
115            unsigned pixel_y = pixel / hw_grid_width;
116            unsigned wi = pixel * ms + sample;
117            unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
118            ri = ri * ms + sample;
119            sample_locations[wi][0] = locations[ri] & 0xf;
120            sample_locations[wi][1] = 16 - (locations[ri] >> 4);
121         }
122      }
123   } else {
124      const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
125      for (i = 0; i < 16; i++) {
126         sample_locations[i][0] = ptr[i % ms][0];
127         sample_locations[i][1] = ptr[i % ms][1];
128      }
129   }
130
131   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
132   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
133   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
134   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
135   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64);
136   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
137   for (pixel_y = 0; pixel_y < 4; pixel_y++) {
138      for (pixel_x = 0; pixel_x < 2; pixel_x++) {
139         for (sample = 0; sample < ms; sample++) {
140            unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
141            unsigned read_index = pixel_y % grid_height * hw_grid_width;
142            uint8_t x, y;
143            read_index += pixel_x % grid_width;
144            read_index = read_index * ms + sample;
145            x = sample_locations[read_index][0];
146            y = sample_locations[read_index][1];
147            cb[write_index] = gm200_encode_cb_sample_location(x, y);
148         }
149      }
150   }
151   PUSH_DATAp(push, cb, 64);
152
153   for (i = 0; i < 16; i++) {
154      packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
155      packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
156   }
157
158   BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
159   PUSH_DATAp(push, packed_locations, 4);
160}
161
162static void
163nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
164{
165   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
166   struct nvc0_screen *screen = nvc0->screen;
167   unsigned i;
168
169   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
170   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
171   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
172   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
173   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
174   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
175   for (i = 0; i < ms; i++) {
176      float xy[2];
177      nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
178      PUSH_DATAf(push, xy[0]);
179      PUSH_DATAf(push, xy[1]);
180   }
181}
182
183static void
184validate_sample_locations(struct nvc0_context *nvc0)
185{
186   unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer);
187
188   if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
189      gm200_validate_sample_locations(nvc0, ms);
190   else
191      nvc0_validate_sample_locations(nvc0, ms);
192}
193
194static void
195nvc0_validate_fb(struct nvc0_context *nvc0)
196{
197   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
198   struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
199   unsigned i;
200   unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
201   unsigned nr_cbufs = fb->nr_cbufs;
202   bool serialize = false;
203
204   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
205
206   BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
207   PUSH_DATA (push, fb->width << 16);
208   PUSH_DATA (push, fb->height << 16);
209
210   for (i = 0; i < fb->nr_cbufs; ++i) {
211      struct nv50_surface *sf;
212      struct nv04_resource *res;
213      struct nouveau_bo *bo;
214
215      if (!fb->cbufs[i]) {
216         nvc0_fb_set_null_rt(push, i, 0);
217         continue;
218      }
219
220      sf = nv50_surface(fb->cbufs[i]);
221      res = nv04_resource(sf->base.texture);
222      bo = res->bo;
223
224      BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
225      PUSH_DATAh(push, res->address + sf->offset);
226      PUSH_DATA (push, res->address + sf->offset);
227      if (likely(nouveau_bo_memtype(bo))) {
228         struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
229
230         assert(sf->base.texture->target != PIPE_BUFFER);
231
232         PUSH_DATA(push, sf->width);
233         PUSH_DATA(push, sf->height);
234         PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
235         PUSH_DATA(push, (mt->layout_3d << 16) |
236                          mt->level[sf->base.u.tex.level].tile_mode);
237         PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);
238         PUSH_DATA(push, mt->layer_stride >> 2);
239         PUSH_DATA(push, sf->base.u.tex.first_layer);
240
241         ms_mode = mt->ms_mode;
242      } else {
243         if (res->base.target == PIPE_BUFFER) {
244            PUSH_DATA(push, 262144);
245            PUSH_DATA(push, 1);
246         } else {
247            PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);
248            PUSH_DATA(push, sf->height);
249         }
250         PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
251         PUSH_DATA(push, 1 << 12);
252         PUSH_DATA(push, 1);
253         PUSH_DATA(push, 0);
254         PUSH_DATA(push, 0);
255
256         nvc0_resource_fence(res, NOUVEAU_BO_WR);
257
258         assert(!fb->zsbuf);
259      }
260
261      if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
262         serialize = true;
263      res->status |=  NOUVEAU_BUFFER_STATUS_GPU_WRITING;
264      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
265
266      /* only register for writing, otherwise we'd always serialize here */
267      BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);
268   }
269
270   if (fb->zsbuf) {
271      struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
272      struct nv50_surface *sf = nv50_surface(fb->zsbuf);
273      int unk = mt->base.base.target == PIPE_TEXTURE_2D;
274
275      BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
276      PUSH_DATAh(push, mt->base.address + sf->offset);
277      PUSH_DATA (push, mt->base.address + sf->offset);
278      PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);
279      PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
280      PUSH_DATA (push, mt->layer_stride >> 2);
281      BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
282      PUSH_DATA (push, 1);
283      BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
284      PUSH_DATA (push, sf->width);
285      PUSH_DATA (push, sf->height);
286      PUSH_DATA (push, (unk << 16) |
287                (sf->base.u.tex.first_layer + sf->depth));
288      BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
289      PUSH_DATA (push, sf->base.u.tex.first_layer);
290
291      ms_mode = mt->ms_mode;
292
293      if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
294         serialize = true;
295      mt->base.status |=  NOUVEAU_BUFFER_STATUS_GPU_WRITING;
296      mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
297
298      BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);
299   } else {
300       BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
301      PUSH_DATA (push, 0);
302   }
303
304   if (nr_cbufs == 0 && !fb->zsbuf) {
305      assert(util_is_power_of_two_or_zero(fb->samples));
306      assert(fb->samples <= 8);
307
308      nvc0_fb_set_null_rt(push, 0, fb->layers);
309
310      if (fb->samples > 1)
311         ms_mode = ffs(fb->samples) - 1;
312      nr_cbufs = 1;
313   }
314
315   BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
316   PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
317   IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
318
319   if (serialize)
320      IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
321
322   NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);
323}
324
325static void
326nvc0_validate_blend_colour(struct nvc0_context *nvc0)
327{
328   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
329
330   BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);
331   PUSH_DATAf(push, nvc0->blend_colour.color[0]);
332   PUSH_DATAf(push, nvc0->blend_colour.color[1]);
333   PUSH_DATAf(push, nvc0->blend_colour.color[2]);
334   PUSH_DATAf(push, nvc0->blend_colour.color[3]);
335}
336
337static void
338nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
339{
340    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
341    const ubyte *ref = &nvc0->stencil_ref.ref_value[0];
342
343    IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);
344    IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);
345}
346
347static void
348nvc0_validate_stipple(struct nvc0_context *nvc0)
349{
350    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
351    unsigned i;
352
353    BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
354    for (i = 0; i < 32; ++i)
355        PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));
356}
357
358static void
359nvc0_validate_scissor(struct nvc0_context *nvc0)
360{
361   int i;
362   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
363
364   if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&
365      nvc0->rast->pipe.scissor == nvc0->state.scissor)
366      return;
367
368   if (nvc0->state.scissor != nvc0->rast->pipe.scissor)
369      nvc0->scissors_dirty = (1 << NVC0_MAX_VIEWPORTS) - 1;
370
371   nvc0->state.scissor = nvc0->rast->pipe.scissor;
372
373   for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
374      struct pipe_scissor_state *s = &nvc0->scissors[i];
375      if (!(nvc0->scissors_dirty & (1 << i)))
376         continue;
377
378      BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(i)), 2);
379      if (nvc0->rast->pipe.scissor) {
380         PUSH_DATA(push, (s->maxx << 16) | s->minx);
381         PUSH_DATA(push, (s->maxy << 16) | s->miny);
382      } else {
383         PUSH_DATA(push, (0xffff << 16) | 0);
384         PUSH_DATA(push, (0xffff << 16) | 0);
385      }
386   }
387   nvc0->scissors_dirty = 0;
388}
389
390static void
391nvc0_validate_viewport(struct nvc0_context *nvc0)
392{
393   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
394   int x, y, w, h, i;
395   float zmin, zmax;
396
397   for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
398      struct pipe_viewport_state *vp = &nvc0->viewports[i];
399
400      if (!(nvc0->viewports_dirty & (1 << i)))
401         continue;
402
403      BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(i)), 3);
404      PUSH_DATAf(push, vp->translate[0]);
405      PUSH_DATAf(push, vp->translate[1]);
406      PUSH_DATAf(push, vp->translate[2]);
407
408      BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(i)), 3);
409      PUSH_DATAf(push, vp->scale[0]);
410      PUSH_DATAf(push, vp->scale[1]);
411      PUSH_DATAf(push, vp->scale[2]);
412
413      /* now set the viewport rectangle to viewport dimensions for clipping */
414
415      x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));
416      y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));
417      w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;
418      h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;
419
420      BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(i)), 2);
421      PUSH_DATA (push, (w << 16) | x);
422      PUSH_DATA (push, (h << 16) | y);
423
424      /* If the halfz setting ever changes, the viewports will also get
425       * updated. The rast will get updated before the validate function has a
426       * chance to hit, so we can just use it directly without an atom
427       * dependency.
428       */
429      util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
430
431      BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
432      PUSH_DATAf(push, zmin);
433      PUSH_DATAf(push, zmax);
434   }
435   nvc0->viewports_dirty = 0;
436}
437
438static void
439nvc0_validate_window_rects(struct nvc0_context *nvc0)
440{
441   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
442   bool enable = nvc0->window_rect.rects > 0 || nvc0->window_rect.inclusive;
443   int i;
444
445   IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_EN), enable);
446   if (!enable)
447      return;
448
449   IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), !nvc0->window_rect.inclusive);
450   BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES * 2);
451   for (i = 0; i < nvc0->window_rect.rects; i++) {
452      struct pipe_scissor_state *s = &nvc0->window_rect.rect[i];
453      PUSH_DATA(push, (s->maxx << 16) | s->minx);
454      PUSH_DATA(push, (s->maxy << 16) | s->miny);
455   }
456   for (; i < NVC0_MAX_WINDOW_RECTANGLES; i++) {
457      PUSH_DATA(push, 0);
458      PUSH_DATA(push, 0);
459   }
460}
461
462static inline void
463nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
464{
465   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
466   struct nvc0_screen *screen = nvc0->screen;
467
468   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
469   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
470   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
471   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
472   BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
473   PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
474   PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
475}
476
477static inline void
478nvc0_check_program_ucps(struct nvc0_context *nvc0,
479                        struct nvc0_program *vp, uint8_t mask)
480{
481   const unsigned n = util_logbase2(mask) + 1;
482
483   if (vp->vp.num_ucps >= n)
484      return;
485   nvc0_program_destroy(nvc0, vp);
486
487   vp->vp.num_ucps = n;
488   if (likely(vp == nvc0->vertprog))
489      nvc0_vertprog_validate(nvc0);
490   else
491   if (likely(vp == nvc0->gmtyprog))
492      nvc0_gmtyprog_validate(nvc0);
493   else
494      nvc0_tevlprog_validate(nvc0);
495}
496
497static void
498nvc0_validate_clip(struct nvc0_context *nvc0)
499{
500   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
501   struct nvc0_program *vp;
502   unsigned stage;
503   uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
504
505   if (nvc0->gmtyprog) {
506      stage = 3;
507      vp = nvc0->gmtyprog;
508   } else
509   if (nvc0->tevlprog) {
510      stage = 2;
511      vp = nvc0->tevlprog;
512   } else {
513      stage = 0;
514      vp = nvc0->vertprog;
515   }
516
517   if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
518      nvc0_check_program_ucps(nvc0, vp, clip_enable);
519
520   if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))
521      if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
522         nvc0_upload_uclip_planes(nvc0, stage);
523
524   clip_enable &= vp->vp.clip_enable;
525   clip_enable |= vp->vp.cull_enable;
526
527   if (nvc0->state.clip_enable != clip_enable) {
528      nvc0->state.clip_enable = clip_enable;
529      IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);
530   }
531   if (nvc0->state.clip_mode != vp->vp.clip_mode) {
532      nvc0->state.clip_mode = vp->vp.clip_mode;
533      BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);
534      PUSH_DATA (push, vp->vp.clip_mode);
535   }
536}
537
538static void
539nvc0_validate_blend(struct nvc0_context *nvc0)
540{
541   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
542
543   PUSH_SPACE(push, nvc0->blend->size);
544   PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);
545}
546
547static void
548nvc0_validate_zsa(struct nvc0_context *nvc0)
549{
550   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
551
552   PUSH_SPACE(push, nvc0->zsa->size);
553   PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);
554}
555
556static void
557nvc0_validate_rasterizer(struct nvc0_context *nvc0)
558{
559   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
560
561   PUSH_SPACE(push, nvc0->rast->size);
562   PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);
563}
564
565static void
566nvc0_constbufs_validate(struct nvc0_context *nvc0)
567{
568   unsigned s;
569
570   bool can_serialize = true;
571
572   for (s = 0; s < 5; ++s) {
573      while (nvc0->constbuf_dirty[s]) {
574         int i = ffs(nvc0->constbuf_dirty[s]) - 1;
575         nvc0->constbuf_dirty[s] &= ~(1 << i);
576
577         if (nvc0->constbuf[s][i].user) {
578            struct nouveau_bo *bo = nvc0->screen->uniform_bo;
579            const unsigned base = NVC0_CB_USR_INFO(s);
580            const unsigned size = nvc0->constbuf[s][0].size;
581            assert(i == 0); /* we really only want OpenGL uniforms here */
582            assert(nvc0->constbuf[s][0].u.data);
583
584            if (!nvc0->state.uniform_buffer_bound[s]) {
585               nvc0->state.uniform_buffer_bound[s] = true;
586
587               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
588                                      NVC0_MAX_CONSTBUF_SIZE, bo->offset + base);
589            }
590            nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
591                         base, NVC0_MAX_CONSTBUF_SIZE,
592                         0, (size + 3) / 4,
593                         nvc0->constbuf[s][0].u.data);
594         } else {
595            struct nv04_resource *res =
596               nv04_resource(nvc0->constbuf[s][i].u.buf);
597            if (res) {
598               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
599                                      nvc0->constbuf[s][i].size,
600                                      res->address + nvc0->constbuf[s][i].offset);
601
602               BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
603
604               nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
605               res->cb_bindings[s] |= 1 << i;
606
607               if (i == 0)
608                  nvc0->state.uniform_buffer_bound[s] = false;
609            } else if (i != 0) {
610               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0);
611            }
612         }
613      }
614   }
615
616   if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
617      /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
618      nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
619      nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
620      nvc0->state.uniform_buffer_bound[5] = false;
621   }
622}
623
624static void
625nvc0_validate_buffers(struct nvc0_context *nvc0)
626{
627   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
628   struct nvc0_screen *screen = nvc0->screen;
629   int i, s;
630
631   for (s = 0; s < 5; s++) {
632      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
633      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
634      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
635      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
636      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
637      PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
638      for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
639         if (nvc0->buffers[s][i].buffer) {
640            struct nv04_resource *res =
641               nv04_resource(nvc0->buffers[s][i].buffer);
642            PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
643            PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
644            PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
645            PUSH_DATA (push, 0);
646            BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
647            util_range_add(&res->valid_buffer_range,
648                           nvc0->buffers[s][i].buffer_offset,
649                           nvc0->buffers[s][i].buffer_offset +
650                           nvc0->buffers[s][i].buffer_size);
651         } else {
652            PUSH_DATA (push, 0);
653            PUSH_DATA (push, 0);
654            PUSH_DATA (push, 0);
655            PUSH_DATA (push, 0);
656         }
657      }
658   }
659
660}
661
662static void
663nvc0_validate_sample_mask(struct nvc0_context *nvc0)
664{
665   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
666
667   unsigned mask[4] =
668   {
669      nvc0->sample_mask & 0xffff,
670      nvc0->sample_mask & 0xffff,
671      nvc0->sample_mask & 0xffff,
672      nvc0->sample_mask & 0xffff
673   };
674
675   BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
676   PUSH_DATA (push, mask[0]);
677   PUSH_DATA (push, mask[1]);
678   PUSH_DATA (push, mask[2]);
679   PUSH_DATA (push, mask[3]);
680}
681
682static void
683nvc0_validate_min_samples(struct nvc0_context *nvc0)
684{
685   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
686   int samples;
687
688   samples = util_next_power_of_two(nvc0->min_samples);
689   if (samples > 1) {
690      // If we're using the incoming sample mask and doing sample shading, we
691      // have to do sample shading "to the max", otherwise there's no way to
692      // tell which sets of samples are covered by the current invocation.
693      // Similarly for reading the framebuffer.
694      if (nvc0->fragprog && (
695                nvc0->fragprog->fp.sample_mask_in ||
696                nvc0->fragprog->fp.reads_framebuffer))
697         samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
698      samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
699   }
700
701   IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
702}
703
704static void
705nvc0_validate_driverconst(struct nvc0_context *nvc0)
706{
707   struct nvc0_screen *screen = nvc0->screen;
708   int i;
709
710   for (i = 0; i < 5; ++i)
711      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
712                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
713
714   nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
715}
716
717static void
718nvc0_validate_fp_zsa_rast(struct nvc0_context *nvc0)
719{
720   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
721   bool rasterizer_discard;
722
723   if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
724      rasterizer_discard = true;
725   } else {
726      bool zs = nvc0->zsa &&
727         (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
728      rasterizer_discard = !zs &&
729         (!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
730   }
731
732   if (rasterizer_discard != nvc0->state.rasterizer_discard) {
733      nvc0->state.rasterizer_discard = rasterizer_discard;
734      IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);
735   }
736}
737
738/* alpha test is disabled if there are no color RTs, so make sure we have at
739 * least one if alpha test is enabled. Note that this must run after
740 * nvc0_validate_fb, otherwise that will override the RT count setting.
741 */
742static void
743nvc0_validate_zsa_fb(struct nvc0_context *nvc0)
744{
745   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
746
747   if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
748       nvc0->framebuffer.zsbuf &&
749       nvc0->framebuffer.nr_cbufs == 0) {
750      nvc0_fb_set_null_rt(push, 0, 0);
751      BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
752      PUSH_DATA (push, (076543210 << 4) | 1);
753   }
754}
755
756static void
757nvc0_validate_rast_fb(struct nvc0_context *nvc0)
758{
759   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
760   struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
761   struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
762
763   if (!rast)
764      return;
765
766   if (rast->offset_units_unscaled) {
767      BEGIN_NVC0(push, NVC0_3D(POLYGON_OFFSET_UNITS), 1);
768      if (fb->zsbuf && fb->zsbuf->format == PIPE_FORMAT_Z16_UNORM)
769         PUSH_DATAf(push, rast->offset_units * (1 << 16));
770      else
771         PUSH_DATAf(push, rast->offset_units * (1 << 24));
772   }
773}
774
775
776static void
777nvc0_validate_tess_state(struct nvc0_context *nvc0)
778{
779   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
780
781   BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
782   PUSH_DATAp(push, nvc0->default_tess_outer, 4);
783   PUSH_DATAp(push, nvc0->default_tess_inner, 2);
784}
785
786/* If we have a frag shader bound which tries to read from the framebuffer, we
787 * have to make sure that the fb is bound as a texture in the expected
788 * location. For Fermi, that's in the special driver slot 16, while for Kepler
789 * it's a regular binding stored in the driver constbuf.
790 */
791static void
792nvc0_validate_fbread(struct nvc0_context *nvc0)
793{
794   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
795   struct nvc0_screen *screen = nvc0->screen;
796   struct pipe_context *pipe = &nvc0->base.pipe;
797   struct pipe_sampler_view *old_view = nvc0->fbtexture;
798   struct pipe_sampler_view *new_view = NULL;
799
800   if (nvc0->fragprog &&
801       nvc0->fragprog->fp.reads_framebuffer &&
802       nvc0->framebuffer.nr_cbufs &&
803       nvc0->framebuffer.cbufs[0]) {
804      struct pipe_sampler_view tmpl;
805      struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
806
807      tmpl.target = PIPE_TEXTURE_2D_ARRAY;
808      tmpl.format = sf->format;
809      tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
810      tmpl.u.tex.first_layer = sf->u.tex.first_layer;
811      tmpl.u.tex.last_layer = sf->u.tex.last_layer;
812      tmpl.swizzle_r = PIPE_SWIZZLE_X;
813      tmpl.swizzle_g = PIPE_SWIZZLE_Y;
814      tmpl.swizzle_b = PIPE_SWIZZLE_Z;
815      tmpl.swizzle_a = PIPE_SWIZZLE_W;
816
817      /* Bail if it's the same parameters */
818      if (old_view && old_view->texture == sf->texture &&
819          old_view->format == sf->format &&
820          old_view->u.tex.first_level == sf->u.tex.level &&
821          old_view->u.tex.first_layer == sf->u.tex.first_layer &&
822          old_view->u.tex.last_layer == sf->u.tex.last_layer)
823         return;
824
825      new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
826   } else if (old_view == NULL) {
827      return;
828   }
829
830   if (old_view)
831      pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
832   nvc0->fbtexture = new_view;
833
834   if (new_view) {
835      struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
836      assert(tic->id < 0);
837      tic->id = nvc0_screen_tic_alloc(screen, tic);
838      nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
839                           NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
840      screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
841
842      if (screen->base.class_3d >= NVE4_3D_CLASS) {
843         BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
844         PUSH_DATA (push, NVC0_CB_AUX_SIZE);
845         PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
846         PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
847         BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
848         PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
849         PUSH_DATA (push, (0 << 20) | tic->id);
850      } else {
851         BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
852         PUSH_DATA (push, (tic->id << 9) | 1);
853      }
854
855      IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
856   }
857}
858
859static void
860nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
861{
862   struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
863   unsigned s;
864
865   if (ctx_from)
866      ctx_to->state = ctx_from->state;
867   else
868      ctx_to->state = ctx_to->screen->save_state;
869
870   ctx_to->dirty_3d = ~0;
871   ctx_to->dirty_cp = ~0;
872   ctx_to->viewports_dirty = ~0;
873   ctx_to->scissors_dirty = ~0;
874
875   for (s = 0; s < 6; ++s) {
876      ctx_to->samplers_dirty[s] = ~0;
877      ctx_to->textures_dirty[s] = ~0;
878      ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
879      ctx_to->buffers_dirty[s]  = ~0;
880      ctx_to->images_dirty[s]   = ~0;
881   }
882
883   /* Reset tfb as the shader that owns it may have been deleted. */
884   ctx_to->state.tfb = NULL;
885
886   if (!ctx_to->vertex)
887      ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
888
889   if (!ctx_to->vertprog)
890      ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;
891   if (!ctx_to->fragprog)
892      ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;
893
894   if (!ctx_to->blend)
895      ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;
896   if (!ctx_to->rast)
897      ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);
898   if (!ctx_to->zsa)
899      ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;
900
901   ctx_to->screen->cur_ctx = ctx_to;
902}
903
904static struct nvc0_state_validate
905validate_list_3d[] = {
906    { nvc0_validate_fb,            NVC0_NEW_3D_FRAMEBUFFER },
907    { nvc0_validate_blend,         NVC0_NEW_3D_BLEND },
908    { nvc0_validate_zsa,           NVC0_NEW_3D_ZSA },
909    { nvc0_validate_sample_mask,   NVC0_NEW_3D_SAMPLE_MASK },
910    { nvc0_validate_rasterizer,    NVC0_NEW_3D_RASTERIZER },
911    { nvc0_validate_blend_colour,  NVC0_NEW_3D_BLEND_COLOUR },
912    { nvc0_validate_stencil_ref,   NVC0_NEW_3D_STENCIL_REF },
913    { nvc0_validate_stipple,       NVC0_NEW_3D_STIPPLE },
914    { nvc0_validate_scissor,       NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },
915    { nvc0_validate_viewport,      NVC0_NEW_3D_VIEWPORT },
916    { nvc0_validate_window_rects,  NVC0_NEW_3D_WINDOW_RECTS },
917    { nvc0_vertprog_validate,      NVC0_NEW_3D_VERTPROG },
918    { nvc0_tctlprog_validate,      NVC0_NEW_3D_TCTLPROG },
919    { nvc0_tevlprog_validate,      NVC0_NEW_3D_TEVLPROG },
920    { nvc0_validate_tess_state,    NVC0_NEW_3D_TESSFACTOR },
921    { nvc0_gmtyprog_validate,      NVC0_NEW_3D_GMTYPROG },
922    { nvc0_validate_min_samples,   NVC0_NEW_3D_MIN_SAMPLES |
923                                   NVC0_NEW_3D_FRAGPROG |
924                                   NVC0_NEW_3D_FRAMEBUFFER },
925    { nvc0_fragprog_validate,      NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
926    { nvc0_validate_fp_zsa_rast,   NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
927                                   NVC0_NEW_3D_RASTERIZER },
928    { nvc0_validate_zsa_fb,        NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },
929    { nvc0_validate_rast_fb,       NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_FRAMEBUFFER },
930    { nvc0_validate_clip,          NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |
931                                   NVC0_NEW_3D_VERTPROG |
932                                   NVC0_NEW_3D_TEVLPROG |
933                                   NVC0_NEW_3D_GMTYPROG },
934    { nvc0_constbufs_validate,     NVC0_NEW_3D_CONSTBUF },
935    { nvc0_validate_textures,      NVC0_NEW_3D_TEXTURES },
936    { nvc0_validate_samplers,      NVC0_NEW_3D_SAMPLERS },
937    { nve4_set_tex_handles,        NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
938    { nvc0_validate_fbread,        NVC0_NEW_3D_FRAGPROG |
939                                   NVC0_NEW_3D_FRAMEBUFFER },
940    { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
941    { nvc0_validate_surfaces,      NVC0_NEW_3D_SURFACES },
942    { nvc0_validate_buffers,       NVC0_NEW_3D_BUFFERS },
943    { nvc0_tfb_validate,           NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
944    { nvc0_layer_validate,         NVC0_NEW_3D_VERTPROG |
945                                   NVC0_NEW_3D_TEVLPROG |
946                                   NVC0_NEW_3D_GMTYPROG },
947    { nvc0_validate_driverconst,   NVC0_NEW_3D_DRIVERCONST },
948    { validate_sample_locations,   NVC0_NEW_3D_SAMPLE_LOCATIONS |
949                                   NVC0_NEW_3D_FRAMEBUFFER},
950};
951
952bool
953nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask,
954                    struct nvc0_state_validate *validate_list, int size,
955                    uint32_t *dirty, struct nouveau_bufctx *bufctx)
956{
957   uint32_t state_mask;
958   int ret;
959   unsigned i;
960
961   if (nvc0->screen->cur_ctx != nvc0)
962      nvc0_switch_pipe_context(nvc0);
963
964   state_mask = *dirty & mask;
965
966   if (state_mask) {
967      for (i = 0; i < size; ++i) {
968         struct nvc0_state_validate *validate = &validate_list[i];
969
970         if (state_mask & validate->states)
971            validate->func(nvc0);
972      }
973      *dirty &= ~state_mask;
974
975      nvc0_bufctx_fence(nvc0, bufctx, false);
976   }
977
978   nouveau_pushbuf_bufctx(nvc0->base.pushbuf, bufctx);
979   ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
980
981   return !ret;
982}
983
984bool
985nvc0_state_validate_3d(struct nvc0_context *nvc0, uint32_t mask)
986{
987   bool ret;
988
989   ret = nvc0_state_validate(nvc0, mask, validate_list_3d,
990                             ARRAY_SIZE(validate_list_3d), &nvc0->dirty_3d,
991                             nvc0->bufctx_3d);
992
993   if (unlikely(nvc0->state.flushed)) {
994      nvc0->state.flushed = false;
995      nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
996   }
997   return ret;
998}
999