1/*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "pipe/p_context.h"
24#include "pipe/p_defines.h"
25#include "pipe/p_state.h"
26#include "util/u_inlines.h"
27
28#include "nvc0/nvc0_context.h"
29#include "nvc0/nvc0_query_hw.h"
30
31#include "nvc0/nvc0_compute.xml.h"
32
33static inline void
34nvc0_program_update_context_state(struct nvc0_context *nvc0,
35                                  struct nvc0_program *prog, int stage)
36{
37   if (prog && prog->need_tls) {
38      const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
39      if (!nvc0->state.tls_required)
40         BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls);
41      nvc0->state.tls_required |= 1 << stage;
42   } else {
43      if (nvc0->state.tls_required == (1 << stage))
44         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS);
45      nvc0->state.tls_required &= ~(1 << stage);
46   }
47}
48
49static inline bool
50nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
51{
52   if (prog->mem)
53      return true;
54
55   if (!prog->translated) {
56      prog->translated = nvc0_program_translate(
57         prog, nvc0->screen->base.device->chipset,
58         nvc0->screen->base.disk_shader_cache, &nvc0->base.debug);
59      if (!prog->translated)
60         return false;
61   }
62
63   if (likely(prog->code_size))
64      return nvc0_program_upload(nvc0, prog);
65   return true; /* stream output info only */
66}
67
68void
69nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
70                         struct nvc0_program *prog)
71{
72   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
73
74   if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
75      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
76      PUSH_DATA (push, prog->code_base);
77   } else {
78      BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
79      PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
80      PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
81   }
82}
83
84void
85nvc0_vertprog_validate(struct nvc0_context *nvc0)
86{
87   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
88   struct nvc0_program *vp = nvc0->vertprog;
89
90   if (!nvc0_program_validate(nvc0, vp))
91         return;
92   nvc0_program_update_context_state(nvc0, vp, 0);
93
94   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1);
95   PUSH_DATA (push, 0x11);
96   nvc0_program_sp_start_id(nvc0, 1, vp);
97   BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
98   PUSH_DATA (push, vp->num_gprs);
99
100   // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
101   // PUSH_DATA (push, 0);
102}
103
104void
105nvc0_fragprog_validate(struct nvc0_context *nvc0)
106{
107   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
108   struct nvc0_program *fp = nvc0->fragprog;
109   struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
110
111   if (fp->fp.force_persample_interp != rast->force_persample_interp) {
112      /* Force the program to be reuploaded, which will trigger interp fixups
113       * to get applied
114       */
115      if (fp->mem)
116         nouveau_heap_free(&fp->mem);
117
118      fp->fp.force_persample_interp = rast->force_persample_interp;
119   }
120
121   if (fp->fp.msaa != rast->multisample) {
122      /* Force the program to be reuploaded, which will trigger interp fixups
123       * to get applied
124       */
125      if (fp->mem)
126         nouveau_heap_free(&fp->mem);
127
128      fp->fp.msaa = rast->multisample;
129   }
130
131   /* Shade model works well enough when both colors follow it. However if one
132    * (or both) is explicitly set, then we have to go the patching route.
133    */
134   bool has_explicit_color = fp->fp.colors &&
135      (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
136       ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
137   bool hwflatshade = false;
138   if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
139      /* Force re-upload */
140      if (fp->mem)
141         nouveau_heap_free(&fp->mem);
142
143      fp->fp.flatshade = rast->flatshade;
144
145      /* Always smooth-shade in this mode, the shader will decide on its own
146       * when to flat-shade.
147       */
148   } else if (!has_explicit_color) {
149      hwflatshade = rast->flatshade;
150
151      /* No need to binary-patch the shader each time, make sure that it's set
152       * up for the default behaviour.
153       */
154      fp->fp.flatshade = 0;
155   }
156
157   if (hwflatshade != nvc0->state.flatshade) {
158      nvc0->state.flatshade = hwflatshade;
159      BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
160      PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
161                                     NVC0_3D_SHADE_MODEL_SMOOTH);
162   }
163
164   if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {
165      return;
166   }
167
168   if (!nvc0_program_validate(nvc0, fp))
169         return;
170   nvc0_program_update_context_state(nvc0, fp, 4);
171
172   if (fp->fp.early_z != nvc0->state.early_z_forced) {
173      nvc0->state.early_z_forced = fp->fp.early_z;
174      IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
175   }
176   if (fp->fp.post_depth_coverage != nvc0->state.post_depth_coverage) {
177      nvc0->state.post_depth_coverage = fp->fp.post_depth_coverage;
178      IMMED_NVC0(push, NVC0_3D(POST_DEPTH_COVERAGE),
179                 fp->fp.post_depth_coverage);
180   }
181
182   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1);
183   PUSH_DATA (push, 0x51);
184   nvc0_program_sp_start_id(nvc0, 5, fp);
185   BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
186   PUSH_DATA (push, fp->num_gprs);
187
188   BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
189   PUSH_DATA (push, 0x20164010);
190   PUSH_DATA (push, 0x20);
191   BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
192   PUSH_DATA (push, fp->flags[0]);
193}
194
195void
196nvc0_tctlprog_validate(struct nvc0_context *nvc0)
197{
198   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
199   struct nvc0_program *tp = nvc0->tctlprog;
200
201   if (tp && nvc0_program_validate(nvc0, tp)) {
202      if (tp->tp.tess_mode != ~0) {
203         BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
204         PUSH_DATA (push, tp->tp.tess_mode);
205      }
206      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
207      PUSH_DATA (push, 0x21);
208      nvc0_program_sp_start_id(nvc0, 2, tp);
209      BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
210      PUSH_DATA (push, tp->num_gprs);
211   } else {
212      tp = nvc0->tcp_empty;
213      /* not a whole lot we can do to handle this failure */
214      if (!nvc0_program_validate(nvc0, tp))
215         assert(!"unable to validate empty tcp");
216      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
217      PUSH_DATA (push, 0x20);
218      nvc0_program_sp_start_id(nvc0, 2, tp);
219   }
220   nvc0_program_update_context_state(nvc0, tp, 1);
221}
222
223void
224nvc0_tevlprog_validate(struct nvc0_context *nvc0)
225{
226   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
227   struct nvc0_program *tp = nvc0->tevlprog;
228
229   if (tp && nvc0_program_validate(nvc0, tp)) {
230      if (tp->tp.tess_mode != ~0) {
231         BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
232         PUSH_DATA (push, tp->tp.tess_mode);
233      }
234      BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
235      PUSH_DATA (push, 0x31);
236      nvc0_program_sp_start_id(nvc0, 3, tp);
237      BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
238      PUSH_DATA (push, tp->num_gprs);
239   } else {
240      BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
241      PUSH_DATA (push, 0x30);
242   }
243   nvc0_program_update_context_state(nvc0, tp, 2);
244}
245
246void
247nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
248{
249   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
250   struct nvc0_program *gp = nvc0->gmtyprog;
251
252   /* we allow GPs with no code for specifying stream output state only */
253   if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
254      BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
255      PUSH_DATA (push, 0x41);
256      nvc0_program_sp_start_id(nvc0, 4, gp);
257      BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
258      PUSH_DATA (push, gp->num_gprs);
259   } else {
260      BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
261      PUSH_DATA (push, 0x40);
262   }
263   nvc0_program_update_context_state(nvc0, gp, 3);
264}
265
266void
267nvc0_compprog_validate(struct nvc0_context *nvc0)
268{
269   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
270   struct nvc0_program *cp = nvc0->compprog;
271
272   if (cp && !nvc0_program_validate(nvc0, cp))
273      return;
274
275   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
276   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
277}
278
279void
280nvc0_layer_validate(struct nvc0_context *nvc0)
281{
282   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
283   struct nvc0_program *last;
284   bool prog_selects_layer = false;
285   bool layer_viewport_relative = false;
286
287   if (nvc0->gmtyprog)
288      last = nvc0->gmtyprog;
289   else if (nvc0->tevlprog)
290      last = nvc0->tevlprog;
291   else
292      last = nvc0->vertprog;
293
294   if (last) {
295      prog_selects_layer = !!(last->hdr[13] & (1 << 9));
296      layer_viewport_relative = last->vp.layer_viewport_relative;
297   }
298
299   BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
300   PUSH_DATA (push, prog_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
301   if (nvc0->screen->eng3d->oclass >= GM200_3D_CLASS) {
302      IMMED_NVC0(push, NVC0_3D(LAYER_VIEWPORT_RELATIVE),
303                 layer_viewport_relative);
304   }
305}
306
307void
308nvc0_tfb_validate(struct nvc0_context *nvc0)
309{
310   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
311   struct nvc0_transform_feedback_state *tfb;
312   unsigned b;
313
314   if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
315   else
316   if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
317   else
318      tfb = nvc0->vertprog->tfb;
319
320   IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
321
322   if (tfb && tfb != nvc0->state.tfb) {
323      for (b = 0; b < 4; ++b) {
324         if (tfb->varying_count[b]) {
325            unsigned n = (tfb->varying_count[b] + 3) / 4;
326
327            BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
328            PUSH_DATA (push, tfb->stream[b]);
329            PUSH_DATA (push, tfb->varying_count[b]);
330            PUSH_DATA (push, tfb->stride[b]);
331            BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
332            PUSH_DATAp(push, tfb->varying_index[b], n);
333
334            if (nvc0->tfbbuf[b])
335               nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
336         } else {
337            IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
338         }
339      }
340   }
341   nvc0->state.tfb = tfb;
342
343   if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS))
344      return;
345
346   for (b = 0; b < nvc0->num_tfbbufs; ++b) {
347      struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
348      struct nv04_resource *buf;
349
350      if (targ && tfb)
351         targ->stride = tfb->stride[b];
352
353      if (!targ || !targ->stride) {
354         IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
355         continue;
356      }
357
358      buf = nv04_resource(targ->pipe.buffer);
359
360      BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR);
361
362      if (!(nvc0->tfbbuf_dirty & (1 << b)))
363         continue;
364
365      if (!targ->clean)
366         nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
367      nouveau_pushbuf_space(push, 0, 0, 1);
368      BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
369      PUSH_DATA (push, 1);
370      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
371      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
372      PUSH_DATA (push, targ->pipe.buffer_size);
373      if (!targ->clean) {
374         nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4);
375      } else {
376         PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
377         targ->clean = false;
378      }
379   }
380   for (; b < 4; ++b)
381      IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
382}
383