1/*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <xf86drm.h>
24#include <nouveau_drm.h>
25#include <nvif/class.h>
26#include "util/u_format.h"
27#include "util/u_format_s3tc.h"
28#include "util/u_screen.h"
29#include "pipe/p_screen.h"
30#include "compiler/nir/nir.h"
31
32#include "nouveau_vp3_video.h"
33
34#include "nvc0/nvc0_context.h"
35#include "nvc0/nvc0_screen.h"
36
37#include "nvc0/mme/com9097.mme.h"
38#include "nvc0/mme/com90c0.mme.h"
39
40#include "nv50/g80_texture.xml.h"
41
42static boolean
43nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
44                                enum pipe_format format,
45                                enum pipe_texture_target target,
46                                unsigned sample_count,
47                                unsigned storage_sample_count,
48                                unsigned bindings)
49{
50   const struct util_format_description *desc = util_format_description(format);
51
52   if (sample_count > 8)
53      return false;
54   if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
55      return false;
56
57   if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
58      return false;
59
60   /* Short-circuit the rest of the logic -- this is used by the state tracker
61    * to determine valid MS levels in a no-attachments scenario.
62    */
63   if (format == PIPE_FORMAT_NONE && bindings & PIPE_BIND_RENDER_TARGET)
64      return true;
65
66   if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER))
67      if (util_format_get_blocksizebits(format) == 3 * 32)
68         return false;
69
70   if (bindings & PIPE_BIND_LINEAR)
71      if (util_format_is_depth_or_stencil(format) ||
72          (target != PIPE_TEXTURE_1D &&
73           target != PIPE_TEXTURE_2D &&
74           target != PIPE_TEXTURE_RECT) ||
75          sample_count > 1)
76         return false;
77
78   /* Restrict ETC2 and ASTC formats here. These are only supported on GK20A.
79    */
80   if ((desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
81        desc->layout == UTIL_FORMAT_LAYOUT_ASTC) &&
82       /* The claim is that this should work on GM107 but it doesn't. Need to
83        * test further and figure out if it's a nouveau issue or a HW one.
84       nouveau_screen(pscreen)->class_3d < GM107_3D_CLASS &&
85        */
86       nouveau_screen(pscreen)->class_3d != NVEA_3D_CLASS)
87      return false;
88
89   /* shared is always supported */
90   bindings &= ~(PIPE_BIND_LINEAR |
91                 PIPE_BIND_SHARED);
92
93   if (bindings & PIPE_BIND_SHADER_IMAGE) {
94      if (format == PIPE_FORMAT_B8G8R8A8_UNORM &&
95          nouveau_screen(pscreen)->class_3d < NVE4_3D_CLASS) {
96         /* This should work on Fermi, but for currently unknown reasons it
97          * does not and results in breaking reads from pbos. */
98         return false;
99      }
100   }
101
102   return (( nvc0_format_table[format].usage |
103            nvc0_vertex_format[format].usage) & bindings) == bindings;
104}
105
106static int
107nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
108{
109   const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
110   const struct nouveau_screen *screen = nouveau_screen(pscreen);
111   struct nouveau_device *dev = screen->device;
112
113   switch (param) {
114   /* non-boolean caps */
115   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
116   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
117      return 15;
118   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
119      return 12;
120   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
121      return 2048;
122   case PIPE_CAP_MIN_TEXEL_OFFSET:
123      return -8;
124   case PIPE_CAP_MAX_TEXEL_OFFSET:
125      return 7;
126   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
127      return -32;
128   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
129      return 31;
130   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
131      return 128 * 1024 * 1024;
132   case PIPE_CAP_GLSL_FEATURE_LEVEL:
133      return 430;
134   case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
135      return 430;
136   case PIPE_CAP_MAX_RENDER_TARGETS:
137      return 8;
138   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
139      return 1;
140   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
141   case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS:
142      return 8;
143   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
144      return 4;
145   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
146   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
147      return 128;
148   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
149   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
150      return 1024;
151   case PIPE_CAP_MAX_VERTEX_STREAMS:
152      return 4;
153   case PIPE_CAP_MAX_GS_INVOCATIONS:
154      return 32;
155   case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
156      return 1 << 27;
157   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
158      return 2048;
159   case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
160      return 2047;
161   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
162      return 256;
163   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
164      if (class_3d < GM107_3D_CLASS)
165         return 256; /* IMAGE bindings require alignment to 256 */
166      return 16;
167   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
168      return 16;
169   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
170      return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
171   case PIPE_CAP_MAX_VIEWPORTS:
172      return NVC0_MAX_VIEWPORTS;
173   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
174      return 4;
175   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
176      return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
177   case PIPE_CAP_ENDIANNESS:
178      return PIPE_ENDIAN_LITTLE;
179   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
180      return 30;
181   case PIPE_CAP_MAX_WINDOW_RECTANGLES:
182      return NVC0_MAX_WINDOW_RECTANGLES;
183   case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
184      return class_3d >= GM200_3D_CLASS ? 8 : 0;
185   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
186      return 64 * 1024 * 1024;
187   case PIPE_CAP_MAX_VARYINGS:
188      /* NOTE: These only count our slots for GENERIC varyings.
189       * The address space may be larger, but the actual hard limit seems to be
190       * less than what the address space layout permits, so don't add TEXCOORD,
191       * COLOR, etc. here.
192       */
193      return 0x1f0 / 16;
194
195   /* supported caps */
196   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
197   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
198   case PIPE_CAP_TEXTURE_SWIZZLE:
199   case PIPE_CAP_NPOT_TEXTURES:
200   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
201   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
202   case PIPE_CAP_ANISOTROPIC_FILTER:
203   case PIPE_CAP_SEAMLESS_CUBE_MAP:
204   case PIPE_CAP_CUBE_MAP_ARRAY:
205   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
206   case PIPE_CAP_TEXTURE_MULTISAMPLE:
207   case PIPE_CAP_DEPTH_CLIP_DISABLE:
208   case PIPE_CAP_POINT_SPRITE:
209   case PIPE_CAP_TGSI_TEXCOORD:
210   case PIPE_CAP_SM3:
211   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
212   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
213   case PIPE_CAP_VERTEX_COLOR_CLAMPED:
214   case PIPE_CAP_QUERY_TIMESTAMP:
215   case PIPE_CAP_QUERY_TIME_ELAPSED:
216   case PIPE_CAP_OCCLUSION_QUERY:
217   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
218   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
219   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
220   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
221   case PIPE_CAP_INDEP_BLEND_ENABLE:
222   case PIPE_CAP_INDEP_BLEND_FUNC:
223   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
224   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
225   case PIPE_CAP_PRIMITIVE_RESTART:
226   case PIPE_CAP_TGSI_INSTANCEID:
227   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
228   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
229   case PIPE_CAP_CONDITIONAL_RENDER:
230   case PIPE_CAP_TEXTURE_BARRIER:
231   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
232   case PIPE_CAP_START_INSTANCE:
233   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
234   case PIPE_CAP_DRAW_INDIRECT:
235   case PIPE_CAP_USER_VERTEX_BUFFERS:
236   case PIPE_CAP_TEXTURE_QUERY_LOD:
237   case PIPE_CAP_SAMPLE_SHADING:
238   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
239   case PIPE_CAP_TEXTURE_GATHER_SM5:
240   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
241   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
242   case PIPE_CAP_SAMPLER_VIEW_TARGET:
243   case PIPE_CAP_CLIP_HALFZ:
244   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
245   case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
246   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
247   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
248   case PIPE_CAP_DEPTH_BOUNDS_TEST:
249   case PIPE_CAP_TGSI_TXQS:
250   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
251   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
252   case PIPE_CAP_SHAREABLE_SHADERS:
253   case PIPE_CAP_CLEAR_TEXTURE:
254   case PIPE_CAP_DRAW_PARAMETERS:
255   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
256   case PIPE_CAP_MULTI_DRAW_INDIRECT:
257   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
258   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
259   case PIPE_CAP_QUERY_BUFFER_OBJECT:
260   case PIPE_CAP_INVALIDATE_BUFFER:
261   case PIPE_CAP_STRING_MARKER:
262   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
263   case PIPE_CAP_CULL_DISTANCE:
264   case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
265   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
266   case PIPE_CAP_TGSI_VOTE:
267   case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
268   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
269   case PIPE_CAP_TGSI_MUL_ZERO_WINS:
270   case PIPE_CAP_DOUBLES:
271   case PIPE_CAP_INT64:
272   case PIPE_CAP_TGSI_TEX_TXF_LZ:
273   case PIPE_CAP_TGSI_CLOCK:
274   case PIPE_CAP_COMPUTE:
275   case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
276   case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
277   case PIPE_CAP_QUERY_SO_OVERFLOW:
278   case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
279   case PIPE_CAP_TGSI_DIV:
280      return 1;
281   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
282      return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
283   case PIPE_CAP_TGSI_FS_FBFETCH:
284      return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */
285   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
286   case PIPE_CAP_TGSI_BALLOT:
287      return class_3d >= NVE4_3D_CLASS;
288   case PIPE_CAP_BINDLESS_TEXTURE:
289      return class_3d >= NVE4_3D_CLASS;
290   case PIPE_CAP_TGSI_ATOMFADD:
291      return class_3d < GM107_3D_CLASS; /* needs additional lowering */
292   case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
293   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
294   case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
295   case PIPE_CAP_POST_DEPTH_COVERAGE:
296   case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
297   case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
298   case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
299   case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
300      return class_3d >= GM200_3D_CLASS;
301   case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
302      return class_3d >= GP100_3D_CLASS;
303
304   /* caps has to be turned on with nir */
305   case PIPE_CAP_INT64_DIVMOD:
306      return screen->prefer_nir ? 1 : 0;
307
308   /* unsupported caps */
309   case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
310   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
311   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
312   case PIPE_CAP_SHADER_STENCIL_EXPORT:
313   case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
314   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
315   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
316   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
317   case PIPE_CAP_FAKE_SW_MSAA:
318   case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
319   case PIPE_CAP_VERTEXID_NOBASE:
320   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
321   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
322   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
323   case PIPE_CAP_GENERATE_MIPMAP:
324   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
325   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
326   case PIPE_CAP_QUERY_MEMORY_INFO:
327   case PIPE_CAP_PCI_GROUP:
328   case PIPE_CAP_PCI_BUS:
329   case PIPE_CAP_PCI_DEVICE:
330   case PIPE_CAP_PCI_FUNCTION:
331   case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
332   case PIPE_CAP_NATIVE_FENCE_FD:
333   case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
334   case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
335   case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
336   case PIPE_CAP_MEMOBJ:
337   case PIPE_CAP_LOAD_CONSTBUF:
338   case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
339   case PIPE_CAP_TILE_RASTER_ORDER:
340   case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
341   case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
342   case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
343   case PIPE_CAP_CONTEXT_PRIORITY_MASK:
344   case PIPE_CAP_FENCE_SIGNAL:
345   case PIPE_CAP_CONSTBUF0_FLAGS:
346   case PIPE_CAP_PACKED_UNIFORMS:
347   case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
348   case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:
349   case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:
350   case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
351   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
352   case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
353   case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
354   case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
355   case PIPE_CAP_NIR_COMPACT_ARRAYS:
356   case PIPE_CAP_IMAGE_LOAD_FORMATTED:
357      return 0;
358
359   case PIPE_CAP_VENDOR_ID:
360      return 0x10de;
361   case PIPE_CAP_DEVICE_ID: {
362      uint64_t device_id;
363      if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) {
364         NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n");
365         return -1;
366      }
367      return device_id;
368   }
369   case PIPE_CAP_ACCELERATED:
370      return 1;
371   case PIPE_CAP_VIDEO_MEMORY:
372      return dev->vram_size >> 20;
373   case PIPE_CAP_UMA:
374      return 0;
375   default:
376      debug_printf("%s: unhandled cap %d\n", __func__, param);
377      return u_pipe_screen_get_param_defaults(pscreen, param);
378   }
379}
380
381static int
382nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
383                             enum pipe_shader_type shader,
384                             enum pipe_shader_cap param)
385{
386   const struct nouveau_screen *screen = nouveau_screen(pscreen);
387   const uint16_t class_3d = screen->class_3d;
388
389   switch (shader) {
390   case PIPE_SHADER_VERTEX:
391   case PIPE_SHADER_GEOMETRY:
392   case PIPE_SHADER_FRAGMENT:
393   case PIPE_SHADER_COMPUTE:
394   case PIPE_SHADER_TESS_CTRL:
395   case PIPE_SHADER_TESS_EVAL:
396      break;
397   default:
398      return 0;
399   }
400
401   switch (param) {
402   case PIPE_SHADER_CAP_PREFERRED_IR:
403      return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
404   case PIPE_SHADER_CAP_SUPPORTED_IRS:
405      return 1 << PIPE_SHADER_IR_TGSI |
406             1 << PIPE_SHADER_IR_NIR;
407   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
408   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
409   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
410   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
411      return 16384;
412   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
413      return 16;
414   case PIPE_SHADER_CAP_MAX_INPUTS:
415      return 0x200 / 16;
416   case PIPE_SHADER_CAP_MAX_OUTPUTS:
417      return 32;
418   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
419      return NVC0_MAX_CONSTBUF_SIZE;
420   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
421      return NVC0_MAX_PIPE_CONSTBUFS;
422   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
423      return shader != PIPE_SHADER_FRAGMENT;
424   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
425   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
426   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
427      return 1;
428   case PIPE_SHADER_CAP_MAX_TEMPS:
429      return NVC0_CAP_MAX_PROGRAM_TEMPS;
430   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
431      return 1;
432   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
433      return 1;
434   case PIPE_SHADER_CAP_SUBROUTINES:
435      return 1;
436   case PIPE_SHADER_CAP_INTEGERS:
437      return 1;
438   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
439      return 1;
440   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
441      return 1;
442   case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
443      return 1;
444   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
445   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
446   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
447   case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
448   case PIPE_SHADER_CAP_INT64_ATOMICS:
449   case PIPE_SHADER_CAP_FP16:
450   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
451   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
452      return 0;
453   case PIPE_SHADER_CAP_SCALAR_ISA:
454      return 1;
455   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
456      return NVC0_MAX_BUFFERS;
457   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
458      return (class_3d >= NVE4_3D_CLASS) ? 32 : 16;
459   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
460      return (class_3d >= NVE4_3D_CLASS) ? 32 : 16;
461   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
462      return 32;
463   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
464      if (class_3d >= NVE4_3D_CLASS)
465         return NVC0_MAX_IMAGES;
466      if (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE)
467         return NVC0_MAX_IMAGES;
468      return 0;
469   default:
470      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
471      return 0;
472   }
473}
474
475static float
476nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
477{
478   const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
479
480   switch (param) {
481   case PIPE_CAPF_MAX_LINE_WIDTH:
482   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
483      return 10.0f;
484   case PIPE_CAPF_MAX_POINT_WIDTH:
485      return 63.0f;
486   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
487      return 63.375f;
488   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
489      return 16.0f;
490   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
491      return 15.0f;
492   case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
493      return 0.0f;
494   case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
495      return class_3d >= GM200_3D_CLASS ? 0.75f : 0.0f;
496   case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
497      return class_3d >= GM200_3D_CLASS ? 0.25f : 0.0f;
498   }
499
500   NOUVEAU_ERR("unknown PIPE_CAPF %d\n", param);
501   return 0.0f;
502}
503
504static int
505nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
506                              enum pipe_shader_ir ir_type,
507                              enum pipe_compute_cap param, void *data)
508{
509   struct nvc0_screen *screen = nvc0_screen(pscreen);
510   const uint16_t obj_class = screen->compute->oclass;
511
512#define RET(x) do {                  \
513   if (data)                         \
514      memcpy(data, x, sizeof(x));    \
515   return sizeof(x);                 \
516} while (0)
517
518   switch (param) {
519   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
520      RET((uint64_t []) { 3 });
521   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
522      if (obj_class >= NVE4_COMPUTE_CLASS) {
523         RET(((uint64_t []) { 0x7fffffff, 65535, 65535 }));
524      } else {
525         RET(((uint64_t []) { 65535, 65535, 65535 }));
526      }
527   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
528      RET(((uint64_t []) { 1024, 1024, 64 }));
529   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
530      RET((uint64_t []) { 1024 });
531   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
532      if (obj_class >= NVE4_COMPUTE_CLASS) {
533         RET((uint64_t []) { 1024 });
534      } else {
535         RET((uint64_t []) { 512 });
536      }
537   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
538      RET((uint64_t []) { 1ULL << 40 });
539   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
540      switch (obj_class) {
541      case GM200_COMPUTE_CLASS:
542         RET((uint64_t []) { 96 << 10 });
543         break;
544      case GM107_COMPUTE_CLASS:
545         RET((uint64_t []) { 64 << 10 });
546         break;
547      default:
548         RET((uint64_t []) { 48 << 10 });
549         break;
550      }
551   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
552      RET((uint64_t []) { 512 << 10 });
553   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
554      RET((uint64_t []) { 4096 });
555   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
556      RET((uint32_t []) { 32 });
557   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
558      RET((uint64_t []) { 1ULL << 40 });
559   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
560      RET((uint32_t []) { 0 });
561   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
562      RET((uint32_t []) { screen->mp_count_compute });
563   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
564      RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
565   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
566      RET((uint32_t []) { 64 });
567   default:
568      return 0;
569   }
570
571#undef RET
572}
573
574static void
575nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen,
576                                  unsigned sample_count,
577                                  unsigned *width, unsigned *height)
578{
579   switch (sample_count) {
580   case 0:
581   case 1:
582      /* this could be 4x4, but the GL state tracker makes it difficult to
583       * create a 1x MSAA texture and smaller grids save CB space */
584      *width = 2;
585      *height = 4;
586      break;
587   case 2:
588      *width = 2;
589      *height = 4;
590      break;
591   case 4:
592      *width = 2;
593      *height = 2;
594      break;
595   case 8:
596      *width = 1;
597      *height = 2;
598      break;
599   default:
600      assert(0);
601   }
602}
603
604static void
605nvc0_screen_destroy(struct pipe_screen *pscreen)
606{
607   struct nvc0_screen *screen = nvc0_screen(pscreen);
608
609   if (!nouveau_drm_screen_unref(&screen->base))
610      return;
611
612   if (screen->base.fence.current) {
613      struct nouveau_fence *current = NULL;
614
615      /* nouveau_fence_wait will create a new current fence, so wait on the
616       * _current_ one, and remove both.
617       */
618      nouveau_fence_ref(screen->base.fence.current, &current);
619      nouveau_fence_wait(current, NULL);
620      nouveau_fence_ref(NULL, &current);
621      nouveau_fence_ref(NULL, &screen->base.fence.current);
622   }
623   if (screen->base.pushbuf)
624      screen->base.pushbuf->user_priv = NULL;
625
626   if (screen->blitter)
627      nvc0_blitter_destroy(screen);
628   if (screen->pm.prog) {
629      screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
630      nvc0_program_destroy(NULL, screen->pm.prog);
631      FREE(screen->pm.prog);
632   }
633
634   nouveau_bo_ref(NULL, &screen->text);
635   nouveau_bo_ref(NULL, &screen->uniform_bo);
636   nouveau_bo_ref(NULL, &screen->tls);
637   nouveau_bo_ref(NULL, &screen->txc);
638   nouveau_bo_ref(NULL, &screen->fence.bo);
639   nouveau_bo_ref(NULL, &screen->poly_cache);
640
641   nouveau_heap_destroy(&screen->lib_code);
642   nouveau_heap_destroy(&screen->text_heap);
643
644   FREE(screen->tic.entries);
645
646   nouveau_object_del(&screen->eng3d);
647   nouveau_object_del(&screen->eng2d);
648   nouveau_object_del(&screen->m2mf);
649   nouveau_object_del(&screen->compute);
650   nouveau_object_del(&screen->nvsw);
651
652   nouveau_screen_fini(&screen->base);
653
654   FREE(screen);
655}
656
657static int
658nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
659                     unsigned size, const uint32_t *data)
660{
661   struct nouveau_pushbuf *push = screen->base.pushbuf;
662
663   size /= 4;
664
665   assert((pos + size) <= 0x800);
666
667   BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
668   PUSH_DATA (push, (m - 0x3800) / 8);
669   PUSH_DATA (push, pos);
670   BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
671   PUSH_DATA (push, pos);
672   PUSH_DATAp(push, data, size);
673
674   return pos + size;
675}
676
677static void
678nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
679{
680   BEGIN_NVC0(push, SUBC_3D(0x10cc), 1);
681   PUSH_DATA (push, 0xff);
682   BEGIN_NVC0(push, SUBC_3D(0x10e0), 2);
683   PUSH_DATA (push, 0xff);
684   PUSH_DATA (push, 0xff);
685   BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
686   PUSH_DATA (push, 0xff);
687   PUSH_DATA (push, 0xff);
688   BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
689   PUSH_DATA (push, 0x3f);
690
691   BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
692   PUSH_DATA (push, (3 << 16) | 3);
693   BEGIN_NVC0(push, SUBC_3D(0x1794), 1);
694   PUSH_DATA (push, (2 << 16) | 2);
695
696   if (obj_class < GM107_3D_CLASS) {
697      BEGIN_NVC0(push, SUBC_3D(0x12ac), 1);
698      PUSH_DATA (push, 0);
699   }
700   BEGIN_NVC0(push, SUBC_3D(0x0218), 1);
701   PUSH_DATA (push, 0x10);
702   BEGIN_NVC0(push, SUBC_3D(0x10fc), 1);
703   PUSH_DATA (push, 0x10);
704   BEGIN_NVC0(push, SUBC_3D(0x1290), 1);
705   PUSH_DATA (push, 0x10);
706   BEGIN_NVC0(push, SUBC_3D(0x12d8), 2);
707   PUSH_DATA (push, 0x10);
708   PUSH_DATA (push, 0x10);
709   BEGIN_NVC0(push, SUBC_3D(0x1140), 1);
710   PUSH_DATA (push, 0x10);
711   BEGIN_NVC0(push, SUBC_3D(0x1610), 1);
712   PUSH_DATA (push, 0xe);
713
714   BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_GEN_MODE), 1);
715   PUSH_DATA (push, NVC0_3D_VERTEX_ID_GEN_MODE_DRAW_ARRAYS_ADD_START);
716   BEGIN_NVC0(push, SUBC_3D(0x030c), 1);
717   PUSH_DATA (push, 0);
718   BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
719   PUSH_DATA (push, 3);
720
721   BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
722   PUSH_DATA (push, 0x3fffff);
723   BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
724   PUSH_DATA (push, 1);
725   BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
726   PUSH_DATA (push, 1);
727
728   if (obj_class < GM107_3D_CLASS) {
729      BEGIN_NVC0(push, SUBC_3D(0x075c), 1);
730      PUSH_DATA (push, 3);
731
732      if (obj_class >= NVE4_3D_CLASS) {
733         BEGIN_NVC0(push, SUBC_3D(0x07fc), 1);
734         PUSH_DATA (push, 1);
735      }
736   }
737
738   /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc
739    * are supposed to do */
740}
741
742static void
743nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
744{
745   struct nvc0_screen *screen = nvc0_screen(pscreen);
746   struct nouveau_pushbuf *push = screen->base.pushbuf;
747
748   /* we need to do it after possible flush in MARK_RING */
749   *sequence = ++screen->base.fence.sequence;
750
751   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
752   PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
753   PUSH_DATAh(push, screen->fence.bo->offset);
754   PUSH_DATA (push, screen->fence.bo->offset);
755   PUSH_DATA (push, *sequence);
756   PUSH_DATA (push, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
757              (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
758}
759
760static u32
761nvc0_screen_fence_update(struct pipe_screen *pscreen)
762{
763   struct nvc0_screen *screen = nvc0_screen(pscreen);
764   return screen->fence.map[0];
765}
766
767static int
768nvc0_screen_init_compute(struct nvc0_screen *screen)
769{
770   screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
771
772   switch (screen->base.device->chipset & ~0xf) {
773   case 0xc0:
774   case 0xd0:
775      return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
776   case 0xe0:
777   case 0xf0:
778   case 0x100:
779   case 0x110:
780   case 0x120:
781   case 0x130:
782      return nve4_screen_compute_setup(screen, screen->base.pushbuf);
783   default:
784      return -1;
785   }
786}
787
788static int
789nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
790                            uint32_t lpos, uint32_t lneg, uint32_t cstack)
791{
792   struct nouveau_bo *bo = NULL;
793   int ret;
794   uint64_t size = (lpos + lneg) * 32 + cstack;
795
796   if (size >= (1 << 20)) {
797      NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
798      return -1;
799   }
800
801   size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
802   size  = align(size, 0x8000);
803   size *= screen->mp_count;
804
805   size = align(size, 1 << 17);
806
807   ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base), 1 << 17, size,
808                        NULL, &bo);
809   if (ret)
810      return ret;
811
812   /* Make sure that the pushbuf has acquired a reference to the old tls
813    * segment, as it may have commands that will reference it.
814    */
815   if (screen->tls)
816      PUSH_REFN(screen->base.pushbuf, screen->tls,
817                NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR);
818   nouveau_bo_ref(NULL, &screen->tls);
819   screen->tls = bo;
820   return 0;
821}
822
823int
824nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
825{
826   struct nouveau_pushbuf *push = screen->base.pushbuf;
827   struct nouveau_bo *bo;
828   int ret;
829
830   ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base),
831                        1 << 17, size, NULL, &bo);
832   if (ret)
833      return ret;
834
835   /* Make sure that the pushbuf has acquired a reference to the old text
836    * segment, as it may have commands that will reference it.
837    */
838   if (screen->text)
839      PUSH_REFN(push, screen->text,
840                NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
841   nouveau_bo_ref(NULL, &screen->text);
842   screen->text = bo;
843
844   nouveau_heap_destroy(&screen->lib_code);
845   nouveau_heap_destroy(&screen->text_heap);
846
847   /* XXX: getting a page fault at the end of the code buffer every few
848    *  launches, don't use the last 256 bytes to work around them - prefetch ?
849    */
850   nouveau_heap_init(&screen->text_heap, 0, size - 0x100);
851
852   /* update the code segment setup */
853   BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
854   PUSH_DATAh(push, screen->text->offset);
855   PUSH_DATA (push, screen->text->offset);
856   if (screen->compute) {
857      BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
858      PUSH_DATAh(push, screen->text->offset);
859      PUSH_DATA (push, screen->text->offset);
860   }
861
862   return 0;
863}
864
865void
866nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize,
867                       int stage, int index, int size, uint64_t addr)
868{
869   assert(stage != 5);
870
871   struct nouveau_pushbuf *push = screen->base.pushbuf;
872
873   if (screen->base.class_3d >= GM107_3D_CLASS) {
874      struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index];
875
876      // TODO: Better figure out the conditions in which this is needed
877      bool serialize = binding->addr == addr && binding->size != size;
878      if (can_serialize)
879         serialize = serialize && *can_serialize;
880      if (serialize) {
881         IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
882         if (can_serialize)
883            *can_serialize = false;
884      }
885
886      binding->addr = addr;
887      binding->size = size;
888   }
889
890   if (size >= 0) {
891      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
892      PUSH_DATA (push, size);
893      PUSH_DATAh(push, addr);
894      PUSH_DATA (push, addr);
895   }
896   IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0));
897}
898
899static const nir_shader_compiler_options nir_options = {
900   .lower_fdiv = false,
901   .lower_ffma = false,
902   .fuse_ffma = false, /* nir doesn't track mad vs fma */
903   .lower_flrp32 = true,
904   .lower_flrp64 = true,
905   .lower_fpow = false,
906   .lower_fsat = false,
907   .lower_fsqrt = false, // TODO: only before gm200
908   .lower_fmod32 = true,
909   .lower_fmod64 = true,
910   .lower_bitfield_extract = false,
911   .lower_bitfield_extract_to_shifts = false,
912   .lower_bitfield_insert = false,
913   .lower_bitfield_insert_to_shifts = false,
914   .lower_bitfield_reverse = false,
915   .lower_bit_count = false,
916   .lower_bfm = false,
917   .lower_ifind_msb = false,
918   .lower_find_lsb = false,
919   .lower_uadd_carry = true, // TODO
920   .lower_usub_borrow = true, // TODO
921   .lower_mul_high = false,
922   .lower_negate = false,
923   .lower_sub = false, // TODO
924   .lower_scmp = true, // TODO: not implemented yet
925   .lower_idiv = true,
926   .lower_isign = false, // TODO
927   .fdot_replicates = false, // TODO
928   .lower_ffloor = false, // TODO
929   .lower_ffract = true,
930   .lower_fceil = false, // TODO
931   .lower_ldexp = true,
932   .lower_pack_half_2x16 = true,
933   .lower_pack_unorm_2x16 = true,
934   .lower_pack_snorm_2x16 = true,
935   .lower_pack_unorm_4x8 = true,
936   .lower_pack_snorm_4x8 = true,
937   .lower_unpack_half_2x16 = true,
938   .lower_unpack_unorm_2x16 = true,
939   .lower_unpack_snorm_2x16 = true,
940   .lower_unpack_unorm_4x8 = true,
941   .lower_unpack_snorm_4x8 = true,
942   .lower_extract_byte = true,
943   .lower_extract_word = true,
944   .lower_all_io_to_temps = false,
945   .native_integers = true,
946   .vertex_id_zero_based = false,
947   .lower_base_vertex = false,
948   .lower_helper_invocation = false,
949   .lower_cs_local_index_from_id = true,
950   .lower_cs_local_id_from_index = false,
951   .lower_device_index_to_zero = false, // TODO
952   .lower_wpos_pntc = false, // TODO
953   .lower_hadd = true, // TODO
954   .lower_add_sat = true, // TODO
955   .use_interpolated_input_intrinsics = true,
956   .lower_mul_2x32_64 = true, // TODO
957   .max_unroll_iterations = 32,
958   .lower_int64_options = nir_lower_divmod64, // TODO
959   .lower_doubles_options = 0, // TODO
960};
961
962static const void *
963nvc0_screen_get_compiler_options(struct pipe_screen *pscreen,
964                                 enum pipe_shader_ir ir,
965                                 enum pipe_shader_type shader)
966{
967   if (ir == PIPE_SHADER_IR_NIR)
968      return &nir_options;
969   return NULL;
970}
971
972#define FAIL_SCREEN_INIT(str, err)                    \
973   do {                                               \
974      NOUVEAU_ERR(str, err);                          \
975      goto fail;                                      \
976   } while(0)
977
978struct nouveau_screen *
979nvc0_screen_create(struct nouveau_device *dev)
980{
981   struct nvc0_screen *screen;
982   struct pipe_screen *pscreen;
983   struct nouveau_object *chan;
984   struct nouveau_pushbuf *push;
985   uint64_t value;
986   uint32_t obj_class;
987   uint32_t flags;
988   int ret;
989   unsigned i;
990
991   switch (dev->chipset & ~0xf) {
992   case 0xc0:
993   case 0xd0:
994   case 0xe0:
995   case 0xf0:
996   case 0x100:
997   case 0x110:
998   case 0x120:
999   case 0x130:
1000      break;
1001   default:
1002      return NULL;
1003   }
1004
1005   screen = CALLOC_STRUCT(nvc0_screen);
1006   if (!screen)
1007      return NULL;
1008   pscreen = &screen->base.base;
1009   pscreen->destroy = nvc0_screen_destroy;
1010
1011   ret = nouveau_screen_init(&screen->base, dev);
1012   if (ret)
1013      FAIL_SCREEN_INIT("Base screen init failed: %d\n", ret);
1014   chan = screen->base.channel;
1015   push = screen->base.pushbuf;
1016   push->user_priv = screen;
1017   push->rsvd_kick = 5;
1018
1019   /* TODO: could this be higher on Kepler+? how does reclocking vs no
1020    * reclocking affect performance?
1021    * TODO: could this be higher on Fermi?
1022    */
1023   if (dev->chipset >= 0xe0)
1024      screen->base.transfer_pushbuf_threshold = 1024;
1025
1026   screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
1027      PIPE_BIND_SHADER_BUFFER |
1028      PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
1029      PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER;
1030   screen->base.sysmem_bindings |=
1031      PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
1032
1033   if (screen->base.vram_domain & NOUVEAU_BO_GART) {
1034      screen->base.sysmem_bindings |= screen->base.vidmem_bindings;
1035      screen->base.vidmem_bindings = 0;
1036   }
1037
1038   pscreen->context_create = nvc0_create;
1039   pscreen->is_format_supported = nvc0_screen_is_format_supported;
1040   pscreen->get_param = nvc0_screen_get_param;
1041   pscreen->get_shader_param = nvc0_screen_get_shader_param;
1042   pscreen->get_paramf = nvc0_screen_get_paramf;
1043   pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid;
1044   pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
1045   pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info;
1046   /* nir stuff */
1047   pscreen->get_compiler_options = nvc0_screen_get_compiler_options;
1048
1049   nvc0_screen_init_resource_functions(pscreen);
1050
1051   screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
1052   screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
1053
1054   flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
1055   if (screen->base.drm->version >= 0x01000202)
1056      flags |= NOUVEAU_BO_COHERENT;
1057
1058   ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo);
1059   if (ret)
1060      FAIL_SCREEN_INIT("Error allocating fence BO: %d\n", ret);
1061   nouveau_bo_map(screen->fence.bo, 0, NULL);
1062   screen->fence.map = screen->fence.bo->map;
1063   screen->base.fence.emit = nvc0_screen_fence_emit;
1064   screen->base.fence.update = nvc0_screen_fence_update;
1065
1066
1067   ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
1068                            NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
1069   if (ret)
1070      FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
1071
1072   BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
1073   PUSH_DATA (push, screen->nvsw->handle);
1074
1075   switch (dev->chipset & ~0xf) {
1076   case 0x130:
1077   case 0x120:
1078   case 0x110:
1079   case 0x100:
1080   case 0xf0:
1081      obj_class = NVF0_P2MF_CLASS;
1082      break;
1083   case 0xe0:
1084      obj_class = NVE4_P2MF_CLASS;
1085      break;
1086   default:
1087      obj_class = NVC0_M2MF_CLASS;
1088      break;
1089   }
1090   ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0,
1091                            &screen->m2mf);
1092   if (ret)
1093      FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
1094
1095   BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
1096   PUSH_DATA (push, screen->m2mf->oclass);
1097   if (screen->m2mf->oclass == NVE4_P2MF_CLASS) {
1098      BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1);
1099      PUSH_DATA (push, 0xa0b5);
1100   }
1101
1102   ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0,
1103                            &screen->eng2d);
1104   if (ret)
1105      FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
1106
1107   BEGIN_NVC0(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
1108   PUSH_DATA (push, screen->eng2d->oclass);
1109   BEGIN_NVC0(push, SUBC_2D(NVC0_2D_SINGLE_GPC), 1);
1110   PUSH_DATA (push, 0);
1111   BEGIN_NVC0(push, NVC0_2D(OPERATION), 1);
1112   PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
1113   BEGIN_NVC0(push, NVC0_2D(CLIP_ENABLE), 1);
1114   PUSH_DATA (push, 0);
1115   BEGIN_NVC0(push, NVC0_2D(COLOR_KEY_ENABLE), 1);
1116   PUSH_DATA (push, 0);
1117   BEGIN_NVC0(push, SUBC_2D(0x0884), 1);
1118   PUSH_DATA (push, 0x3f);
1119   BEGIN_NVC0(push, SUBC_2D(0x0888), 1);
1120   PUSH_DATA (push, 1);
1121   BEGIN_NVC0(push, NVC0_2D(COND_MODE), 1);
1122   PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS);
1123
1124   BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2);
1125   PUSH_DATAh(push, screen->fence.bo->offset + 16);
1126   PUSH_DATA (push, screen->fence.bo->offset + 16);
1127
1128   switch (dev->chipset & ~0xf) {
1129   case 0x130:
1130      switch (dev->chipset) {
1131      case 0x130:
1132      case 0x13b:
1133         obj_class = GP100_3D_CLASS;
1134         break;
1135      default:
1136         obj_class = GP102_3D_CLASS;
1137         break;
1138      }
1139      break;
1140   case 0x120:
1141      obj_class = GM200_3D_CLASS;
1142      break;
1143   case 0x110:
1144      obj_class = GM107_3D_CLASS;
1145      break;
1146   case 0x100:
1147   case 0xf0:
1148      obj_class = NVF0_3D_CLASS;
1149      break;
1150   case 0xe0:
1151      switch (dev->chipset) {
1152      case 0xea:
1153         obj_class = NVEA_3D_CLASS;
1154         break;
1155      default:
1156         obj_class = NVE4_3D_CLASS;
1157         break;
1158      }
1159      break;
1160   case 0xd0:
1161      obj_class = NVC8_3D_CLASS;
1162      break;
1163   case 0xc0:
1164   default:
1165      switch (dev->chipset) {
1166      case 0xc8:
1167         obj_class = NVC8_3D_CLASS;
1168         break;
1169      case 0xc1:
1170         obj_class = NVC1_3D_CLASS;
1171         break;
1172      default:
1173         obj_class = NVC0_3D_CLASS;
1174         break;
1175      }
1176      break;
1177   }
1178   ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0,
1179                            &screen->eng3d);
1180   if (ret)
1181      FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
1182   screen->base.class_3d = obj_class;
1183
1184   BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
1185   PUSH_DATA (push, screen->eng3d->oclass);
1186
1187   BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
1188   PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
1189
1190   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
1191      /* kill shaders after about 1 second (at 100 MHz) */
1192      BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1);
1193      PUSH_DATA (push, 0x17);
1194   }
1195
1196   IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE),
1197                    screen->base.drm->version >= 0x01000101);
1198   BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8);
1199   for (i = 0; i < 8; ++i)
1200      PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
1201
1202   BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
1203   PUSH_DATA (push, 1);
1204
1205   BEGIN_NVC0(push, NVC0_3D(CSAA_ENABLE), 1);
1206   PUSH_DATA (push, 0);
1207   BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 1);
1208   PUSH_DATA (push, 0);
1209   BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 1);
1210   PUSH_DATA (push, NVC0_3D_MULTISAMPLE_MODE_MS1);
1211   BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1);
1212   PUSH_DATA (push, 0);
1213   BEGIN_NVC0(push, NVC0_3D(LINE_WIDTH_SEPARATE), 1);
1214   PUSH_DATA (push, 1);
1215   BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
1216   PUSH_DATA (push, 1);
1217   BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1);
1218   PUSH_DATA (push, 1);
1219   BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
1220   PUSH_DATA (push, 0);
1221   BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
1222   PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
1223   if (screen->eng3d->oclass < NVE4_3D_CLASS) {
1224      IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0);
1225   } else {
1226      BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
1227      PUSH_DATA (push, 15);
1228   }
1229   BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1);
1230   PUSH_DATA (push, 8); /* 128 */
1231   BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1);
1232   PUSH_DATA (push, 1);
1233   if (screen->eng3d->oclass >= NVC1_3D_CLASS) {
1234      BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1);
1235      PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1);
1236   }
1237
1238   nvc0_magic_3d_init(push, screen->eng3d->oclass);
1239
1240   ret = nvc0_screen_resize_text_area(screen, 1 << 19);
1241   if (ret)
1242      FAIL_SCREEN_INIT("Error allocating TEXT area: %d\n", ret);
1243
1244   /* 6 user uniform areas, 6 driver areas, and 1 for the runout */
1245   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 13 << 16, NULL,
1246                        &screen->uniform_bo);
1247   if (ret)
1248      FAIL_SCREEN_INIT("Error allocating uniform BO: %d\n", ret);
1249
1250   PUSH_REFN (push, screen->uniform_bo, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_WR);
1251
1252   /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
1253   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1254   PUSH_DATA (push, 256);
1255   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
1256   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
1257   BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
1258   PUSH_DATA (push, 0);
1259   PUSH_DATAf(push, 0.0f);
1260   PUSH_DATAf(push, 0.0f);
1261   PUSH_DATAf(push, 0.0f);
1262   PUSH_DATAf(push, 0.0f);
1263   BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
1264   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
1265   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
1266
1267   if (screen->base.drm->version >= 0x01000101) {
1268      ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
1269      if (ret)
1270         FAIL_SCREEN_INIT("NOUVEAU_GETPARAM_GRAPH_UNITS failed: %d\n", ret);
1271   } else {
1272      if (dev->chipset >= 0xe0 && dev->chipset < 0xf0)
1273         value = (8 << 8) | 4;
1274      else
1275         value = (16 << 8) | 4;
1276   }
1277   screen->gpc_count = value & 0x000000ff;
1278   screen->mp_count = value >> 8;
1279   screen->mp_count_compute = screen->mp_count;
1280
1281   ret = nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
1282   if (ret)
1283      FAIL_SCREEN_INIT("Error allocating TLS area: %d\n", ret);
1284
1285   BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4);
1286   PUSH_DATAh(push, screen->tls->offset);
1287   PUSH_DATA (push, screen->tls->offset);
1288   PUSH_DATA (push, screen->tls->size >> 32);
1289   PUSH_DATA (push, screen->tls->size);
1290   BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
1291   PUSH_DATA (push, 0);
1292   /* Reduce likelihood of collision with real buffers by placing the hole at
1293    * the top of the 4G area. This will have to be dealt with for real
1294    * eventually by blocking off that area from the VM.
1295    */
1296   BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
1297   PUSH_DATA (push, 0xff << 24);
1298
1299   if (screen->eng3d->oclass < GM107_3D_CLASS) {
1300      ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
1301                           &screen->poly_cache);
1302      if (ret)
1303         FAIL_SCREEN_INIT("Error allocating poly cache BO: %d\n", ret);
1304
1305      BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
1306      PUSH_DATAh(push, screen->poly_cache->offset);
1307      PUSH_DATA (push, screen->poly_cache->offset);
1308      PUSH_DATA (push, 3);
1309   }
1310
1311   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 17, NULL,
1312                        &screen->txc);
1313   if (ret)
1314      FAIL_SCREEN_INIT("Error allocating txc BO: %d\n", ret);
1315
1316   BEGIN_NVC0(push, NVC0_3D(TIC_ADDRESS_HIGH), 3);
1317   PUSH_DATAh(push, screen->txc->offset);
1318   PUSH_DATA (push, screen->txc->offset);
1319   PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
1320   if (screen->eng3d->oclass >= GM107_3D_CLASS) {
1321      screen->tic.maxwell = true;
1322      if (screen->eng3d->oclass == GM107_3D_CLASS) {
1323         screen->tic.maxwell =
1324            debug_get_bool_option("NOUVEAU_MAXWELL_TIC", true);
1325         IMMED_NVC0(push, SUBC_3D(0x0f10), screen->tic.maxwell);
1326      }
1327   }
1328
1329   BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3);
1330   PUSH_DATAh(push, screen->txc->offset + 65536);
1331   PUSH_DATA (push, screen->txc->offset + 65536);
1332   PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
1333
1334   BEGIN_NVC0(push, NVC0_3D(SCREEN_Y_CONTROL), 1);
1335   PUSH_DATA (push, 0);
1336   BEGIN_NVC0(push, NVC0_3D(WINDOW_OFFSET_X), 2);
1337   PUSH_DATA (push, 0);
1338   PUSH_DATA (push, 0);
1339   BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); /* deactivate ZCULL */
1340   PUSH_DATA (push, 0x3f);
1341
1342   BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), 1);
1343   PUSH_DATA (push, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY);
1344   BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
1345   for (i = 0; i < 8 * 2; ++i)
1346      PUSH_DATA(push, 0);
1347   BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_EN), 1);
1348   PUSH_DATA (push, 0);
1349   BEGIN_NVC0(push, NVC0_3D(CLIPID_ENABLE), 1);
1350   PUSH_DATA (push, 0);
1351
1352   /* neither scissors, viewport nor stencil mask should affect clears */
1353   BEGIN_NVC0(push, NVC0_3D(CLEAR_FLAGS), 1);
1354   PUSH_DATA (push, 0);
1355
1356   BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
1357   PUSH_DATA (push, 1);
1358   for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
1359      BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
1360      PUSH_DATAf(push, 0.0f);
1361      PUSH_DATAf(push, 1.0f);
1362   }
1363   BEGIN_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 1);
1364   PUSH_DATA (push, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1);
1365
1366   /* We use scissors instead of exact view volume clipping,
1367    * so they're always enabled.
1368    */
1369   for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
1370      BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(i)), 3);
1371      PUSH_DATA (push, 1);
1372      PUSH_DATA (push, 16384 << 16);
1373      PUSH_DATA (push, 16384 << 16);
1374   }
1375
1376#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
1377
1378   i = 0;
1379   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
1380   MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
1381   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
1382   MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
1383   MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
1384   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
1385   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
1386   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
1387   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
1388   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
1389   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
1390   MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
1391   MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
1392   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
1393   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
1394   MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
1395
1396   BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
1397   PUSH_DATA (push, 1);
1398   BEGIN_NVC0(push, NVC0_3D(RT_SEPARATE_FRAG_DATA), 1);
1399   PUSH_DATA (push, 1);
1400   BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
1401   PUSH_DATA (push, 0x40);
1402   BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
1403   PUSH_DATA (push, 0);
1404   BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
1405   PUSH_DATA (push, 0x30);
1406   BEGIN_NVC0(push, NVC0_3D(PATCH_VERTICES), 1);
1407   PUSH_DATA (push, 3);
1408   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
1409   PUSH_DATA (push, 0x20);
1410   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1);
1411   PUSH_DATA (push, 0x00);
1412   screen->save_state.patch_vertices = 3;
1413
1414   BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1);
1415   PUSH_DATA (push, 0);
1416   BEGIN_NVC0(push, NVC0_3D(POINT_RASTER_RULES), 1);
1417   PUSH_DATA (push, NVC0_3D_POINT_RASTER_RULES_OGL);
1418
1419   IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1);
1420
1421   if (nvc0_screen_init_compute(screen))
1422      goto fail;
1423
1424   /* XXX: Compute and 3D are somehow aliased on Fermi. */
1425   for (i = 0; i < 5; ++i) {
1426      unsigned j = 0;
1427      for (j = 0; j < 16; j++)
1428         screen->cb_bindings[i][j].size = -1;
1429
1430      /* TIC and TSC entries for each unit (nve4+ only) */
1431      /* auxiliary constants (6 user clip planes, base instance id) */
1432      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
1433                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
1434      if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
1435         unsigned j;
1436         BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
1437         PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
1438         for (j = 0; j < 8; ++j)
1439            PUSH_DATA(push, j);
1440      } else {
1441         BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
1442         PUSH_DATA (push, 0x54);
1443      }
1444
1445      /* MS sample coordinate offsets: these do not work with _ALT modes ! */
1446      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * 8);
1447      PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
1448      PUSH_DATA (push, 0); /* 0 */
1449      PUSH_DATA (push, 0);
1450      PUSH_DATA (push, 1); /* 1 */
1451      PUSH_DATA (push, 0);
1452      PUSH_DATA (push, 0); /* 2 */
1453      PUSH_DATA (push, 1);
1454      PUSH_DATA (push, 1); /* 3 */
1455      PUSH_DATA (push, 1);
1456      PUSH_DATA (push, 2); /* 4 */
1457      PUSH_DATA (push, 0);
1458      PUSH_DATA (push, 3); /* 5 */
1459      PUSH_DATA (push, 0);
1460      PUSH_DATA (push, 2); /* 6 */
1461      PUSH_DATA (push, 1);
1462      PUSH_DATA (push, 3); /* 7 */
1463      PUSH_DATA (push, 1);
1464   }
1465   BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
1466   PUSH_DATA (push, 0);
1467
1468   PUSH_KICK (push);
1469
1470   screen->tic.entries = CALLOC(
1471         NVC0_TIC_MAX_ENTRIES + NVC0_TSC_MAX_ENTRIES + NVE4_IMG_MAX_HANDLES,
1472         sizeof(void *));
1473   screen->tsc.entries = screen->tic.entries + NVC0_TIC_MAX_ENTRIES;
1474   screen->img.entries = (void *)(screen->tsc.entries + NVC0_TSC_MAX_ENTRIES);
1475
1476   if (!nvc0_blitter_create(screen))
1477      goto fail;
1478
1479   nouveau_fence_new(&screen->base, &screen->base.fence.current);
1480
1481   return &screen->base;
1482
1483fail:
1484   screen->base.base.context_create = NULL;
1485   return &screen->base;
1486}
1487
1488int
1489nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
1490{
1491   int i = screen->tic.next;
1492
1493   while (screen->tic.lock[i / 32] & (1 << (i % 32)))
1494      i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
1495
1496   screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
1497
1498   if (screen->tic.entries[i])
1499      nv50_tic_entry(screen->tic.entries[i])->id = -1;
1500
1501   screen->tic.entries[i] = entry;
1502   return i;
1503}
1504
1505int
1506nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
1507{
1508   int i = screen->tsc.next;
1509
1510   while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
1511      i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
1512
1513   screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
1514
1515   if (screen->tsc.entries[i])
1516      nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
1517
1518   screen->tsc.entries[i] = entry;
1519   return i;
1520}
1521