1/*
2 * Copyright © 2014 Broadcom
3 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#include "util/os_misc.h"
26#include "pipe/p_defines.h"
27#include "pipe/p_screen.h"
28#include "pipe/p_state.h"
29
30#include "util/u_cpu_detect.h"
31#include "util/u_debug.h"
32#include "util/u_memory.h"
33#include "util/u_format.h"
34#include "util/u_hash_table.h"
35#include "util/u_screen.h"
36#include "util/u_transfer_helper.h"
37#include "util/ralloc.h"
38
39#include <xf86drm.h>
40#include "drm-uapi/drm_fourcc.h"
41#include "drm-uapi/vc4_drm.h"
42#include "vc4_screen.h"
43#include "vc4_context.h"
44#include "vc4_resource.h"
45
46static const struct debug_named_value debug_options[] = {
47        { "cl",       VC4_DEBUG_CL,
48          "Dump command list during creation" },
49        { "surf",       VC4_DEBUG_SURFACE,
50          "Dump surface layouts" },
51        { "qpu",      VC4_DEBUG_QPU,
52          "Dump generated QPU instructions" },
53        { "qir",      VC4_DEBUG_QIR,
54          "Dump QPU IR during program compile" },
55        { "nir",      VC4_DEBUG_NIR,
56          "Dump NIR during program compile" },
57        { "tgsi",     VC4_DEBUG_TGSI,
58          "Dump TGSI during program compile" },
59        { "shaderdb", VC4_DEBUG_SHADERDB,
60          "Dump program compile information for shader-db analysis" },
61        { "perf",     VC4_DEBUG_PERF,
62          "Print during performance-related events" },
63        { "norast",   VC4_DEBUG_NORAST,
64          "Skip actual hardware execution of commands" },
65        { "always_flush", VC4_DEBUG_ALWAYS_FLUSH,
66          "Flush after each draw call" },
67        { "always_sync", VC4_DEBUG_ALWAYS_SYNC,
68          "Wait for finish after each flush" },
69#ifdef USE_VC4_SIMULATOR
70        { "dump", VC4_DEBUG_DUMP,
71          "Write a GPU command stream trace file" },
72#endif
73        { NULL }
74};
75
76DEBUG_GET_ONCE_FLAGS_OPTION(vc4_debug, "VC4_DEBUG", debug_options, 0)
77uint32_t vc4_debug;
78
79static const char *
80vc4_screen_get_name(struct pipe_screen *pscreen)
81{
82        struct vc4_screen *screen = vc4_screen(pscreen);
83
84        if (!screen->name) {
85                screen->name = ralloc_asprintf(screen,
86                                               "VC4 V3D %d.%d",
87                                               screen->v3d_ver / 10,
88                                               screen->v3d_ver % 10);
89        }
90
91        return screen->name;
92}
93
94static const char *
95vc4_screen_get_vendor(struct pipe_screen *pscreen)
96{
97        return "Broadcom";
98}
99
100static void
101vc4_screen_destroy(struct pipe_screen *pscreen)
102{
103        struct vc4_screen *screen = vc4_screen(pscreen);
104
105        util_hash_table_destroy(screen->bo_handles);
106        vc4_bufmgr_destroy(pscreen);
107        slab_destroy_parent(&screen->transfer_pool);
108        free(screen->ro);
109
110#ifdef USE_VC4_SIMULATOR
111        vc4_simulator_destroy(screen);
112#endif
113
114        u_transfer_helper_destroy(pscreen->transfer_helper);
115
116        close(screen->fd);
117        ralloc_free(pscreen);
118}
119
120static bool
121vc4_has_feature(struct vc4_screen *screen, uint32_t feature)
122{
123        struct drm_vc4_get_param p = {
124                .param = feature,
125        };
126        int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_PARAM, &p);
127
128        if (ret != 0)
129                return false;
130
131        return p.value;
132}
133
134static int
135vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
136{
137        struct vc4_screen *screen = vc4_screen(pscreen);
138
139        switch (param) {
140                /* Supported features (boolean caps). */
141        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
142        case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
143        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
144        case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
145        case PIPE_CAP_NPOT_TEXTURES:
146        case PIPE_CAP_SHAREABLE_SHADERS:
147        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
148        case PIPE_CAP_TEXTURE_MULTISAMPLE:
149        case PIPE_CAP_TEXTURE_SWIZZLE:
150        case PIPE_CAP_TEXTURE_BARRIER:
151                return 1;
152
153        case PIPE_CAP_NATIVE_FENCE_FD:
154                return screen->has_syncobj;
155
156        case PIPE_CAP_TILE_RASTER_ORDER:
157                return vc4_has_feature(screen,
158                                       DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER);
159
160                /* lying for GL 2.0 */
161        case PIPE_CAP_OCCLUSION_QUERY:
162        case PIPE_CAP_POINT_SPRITE:
163                return 1;
164
165        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
166        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
167                return 1;
168
169        case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
170        case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
171                return 1;
172
173                /* Texturing. */
174        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
175        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
176                return VC4_MAX_MIP_LEVELS;
177        case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
178                /* Note: Not supported in hardware, just faking it. */
179                return 5;
180
181        case PIPE_CAP_MAX_VARYINGS:
182                return 8;
183
184        case PIPE_CAP_VENDOR_ID:
185                return 0x14E4;
186        case PIPE_CAP_ACCELERATED:
187                return 1;
188        case PIPE_CAP_VIDEO_MEMORY: {
189                uint64_t system_memory;
190
191                if (!os_get_total_physical_memory(&system_memory))
192                        return 0;
193
194                return (int)(system_memory >> 20);
195        }
196        case PIPE_CAP_UMA:
197                return 1;
198
199        default:
200                return u_pipe_screen_get_param_defaults(pscreen, param);
201        }
202}
203
204static float
205vc4_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
206{
207        switch (param) {
208        case PIPE_CAPF_MAX_LINE_WIDTH:
209        case PIPE_CAPF_MAX_LINE_WIDTH_AA:
210                return 32;
211
212        case PIPE_CAPF_MAX_POINT_WIDTH:
213        case PIPE_CAPF_MAX_POINT_WIDTH_AA:
214                return 512.0f;
215
216        case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
217                return 0.0f;
218        case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
219                return 0.0f;
220
221        case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
222        case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
223        case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
224                return 0.0f;
225        default:
226                fprintf(stderr, "unknown paramf %d\n", param);
227                return 0;
228        }
229}
230
231static int
232vc4_screen_get_shader_param(struct pipe_screen *pscreen,
233                            enum pipe_shader_type shader,
234                            enum pipe_shader_cap param)
235{
236        if (shader != PIPE_SHADER_VERTEX &&
237            shader != PIPE_SHADER_FRAGMENT) {
238                return 0;
239        }
240
241        /* this is probably not totally correct.. but it's a start: */
242        switch (param) {
243        case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
244        case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
245        case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
246        case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
247                return 16384;
248
249        case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
250                return vc4_screen(pscreen)->has_control_flow;
251
252        case PIPE_SHADER_CAP_MAX_INPUTS:
253                return 8;
254        case PIPE_SHADER_CAP_MAX_OUTPUTS:
255                return shader == PIPE_SHADER_FRAGMENT ? 1 : 8;
256        case PIPE_SHADER_CAP_MAX_TEMPS:
257                return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
258        case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
259                return 16 * 1024 * sizeof(float);
260        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
261                return 1;
262        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
263                return 0;
264        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
265        case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
266        case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
267                return 0;
268        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
269                return 1;
270        case PIPE_SHADER_CAP_SUBROUTINES:
271                return 0;
272        case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
273                return 0;
274        case PIPE_SHADER_CAP_INTEGERS:
275                return 1;
276        case PIPE_SHADER_CAP_INT64_ATOMICS:
277        case PIPE_SHADER_CAP_FP16:
278        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
279        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
280        case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
281        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
282        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
283                return 0;
284        case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
285        case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
286                return VC4_MAX_TEXTURE_SAMPLERS;
287        case PIPE_SHADER_CAP_PREFERRED_IR:
288                return PIPE_SHADER_IR_NIR;
289        case PIPE_SHADER_CAP_SUPPORTED_IRS:
290                return 0;
291        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
292                return 32;
293        case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
294        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
295        case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
296        case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
297        case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
298        case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
299                return 0;
300        case PIPE_SHADER_CAP_SCALAR_ISA:
301                return 1;
302        default:
303                fprintf(stderr, "unknown shader param %d\n", param);
304                return 0;
305        }
306        return 0;
307}
308
309static boolean
310vc4_screen_is_format_supported(struct pipe_screen *pscreen,
311                               enum pipe_format format,
312                               enum pipe_texture_target target,
313                               unsigned sample_count,
314                               unsigned storage_sample_count,
315                               unsigned usage)
316{
317        struct vc4_screen *screen = vc4_screen(pscreen);
318
319        if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
320                return false;
321
322        if (sample_count > 1 && sample_count != VC4_MAX_SAMPLES)
323                return FALSE;
324
325        if (target >= PIPE_MAX_TEXTURE_TYPES) {
326                return FALSE;
327        }
328
329        if (usage & PIPE_BIND_VERTEX_BUFFER) {
330                switch (format) {
331                case PIPE_FORMAT_R32G32B32A32_FLOAT:
332                case PIPE_FORMAT_R32G32B32_FLOAT:
333                case PIPE_FORMAT_R32G32_FLOAT:
334                case PIPE_FORMAT_R32_FLOAT:
335                case PIPE_FORMAT_R32G32B32A32_SNORM:
336                case PIPE_FORMAT_R32G32B32_SNORM:
337                case PIPE_FORMAT_R32G32_SNORM:
338                case PIPE_FORMAT_R32_SNORM:
339                case PIPE_FORMAT_R32G32B32A32_SSCALED:
340                case PIPE_FORMAT_R32G32B32_SSCALED:
341                case PIPE_FORMAT_R32G32_SSCALED:
342                case PIPE_FORMAT_R32_SSCALED:
343                case PIPE_FORMAT_R16G16B16A16_UNORM:
344                case PIPE_FORMAT_R16G16B16_UNORM:
345                case PIPE_FORMAT_R16G16_UNORM:
346                case PIPE_FORMAT_R16_UNORM:
347                case PIPE_FORMAT_R16G16B16A16_SNORM:
348                case PIPE_FORMAT_R16G16B16_SNORM:
349                case PIPE_FORMAT_R16G16_SNORM:
350                case PIPE_FORMAT_R16_SNORM:
351                case PIPE_FORMAT_R16G16B16A16_USCALED:
352                case PIPE_FORMAT_R16G16B16_USCALED:
353                case PIPE_FORMAT_R16G16_USCALED:
354                case PIPE_FORMAT_R16_USCALED:
355                case PIPE_FORMAT_R16G16B16A16_SSCALED:
356                case PIPE_FORMAT_R16G16B16_SSCALED:
357                case PIPE_FORMAT_R16G16_SSCALED:
358                case PIPE_FORMAT_R16_SSCALED:
359                case PIPE_FORMAT_R8G8B8A8_UNORM:
360                case PIPE_FORMAT_R8G8B8_UNORM:
361                case PIPE_FORMAT_R8G8_UNORM:
362                case PIPE_FORMAT_R8_UNORM:
363                case PIPE_FORMAT_R8G8B8A8_SNORM:
364                case PIPE_FORMAT_R8G8B8_SNORM:
365                case PIPE_FORMAT_R8G8_SNORM:
366                case PIPE_FORMAT_R8_SNORM:
367                case PIPE_FORMAT_R8G8B8A8_USCALED:
368                case PIPE_FORMAT_R8G8B8_USCALED:
369                case PIPE_FORMAT_R8G8_USCALED:
370                case PIPE_FORMAT_R8_USCALED:
371                case PIPE_FORMAT_R8G8B8A8_SSCALED:
372                case PIPE_FORMAT_R8G8B8_SSCALED:
373                case PIPE_FORMAT_R8G8_SSCALED:
374                case PIPE_FORMAT_R8_SSCALED:
375                        break;
376                default:
377                        return FALSE;
378                }
379        }
380
381        if ((usage & PIPE_BIND_RENDER_TARGET) &&
382            !vc4_rt_format_supported(format)) {
383                return FALSE;
384        }
385
386        if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
387            (!vc4_tex_format_supported(format) ||
388             (format == PIPE_FORMAT_ETC1_RGB8 && !screen->has_etc1))) {
389                return FALSE;
390        }
391
392        if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
393            format != PIPE_FORMAT_S8_UINT_Z24_UNORM &&
394            format != PIPE_FORMAT_X8Z24_UNORM) {
395                return FALSE;
396        }
397
398        if ((usage & PIPE_BIND_INDEX_BUFFER) &&
399            format != PIPE_FORMAT_I8_UINT &&
400            format != PIPE_FORMAT_I16_UINT) {
401                return FALSE;
402        }
403
404        return TRUE;
405}
406
407static void
408vc4_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,
409                                  enum pipe_format format, int max,
410                                  uint64_t *modifiers,
411                                  unsigned int *external_only,
412                                  int *count)
413{
414        int m, i;
415        uint64_t available_modifiers[] = {
416                DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
417                DRM_FORMAT_MOD_LINEAR,
418        };
419        struct vc4_screen *screen = vc4_screen(pscreen);
420        int num_modifiers = screen->has_tiling_ioctl ? 2 : 1;
421
422        if (!modifiers) {
423                *count = num_modifiers;
424                return;
425        }
426
427        *count = MIN2(max, num_modifiers);
428        m = screen->has_tiling_ioctl ? 0 : 1;
429        /* We support both modifiers (tiled and linear) for all sampler
430         * formats, but if we don't have the DRM_VC4_GET_TILING ioctl
431         * we shouldn't advertise the tiled formats.
432         */
433        for (i = 0; i < *count; i++) {
434                modifiers[i] = available_modifiers[m++];
435                if (external_only)
436                        external_only[i] = false;
437       }
438}
439
440#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
441
442static unsigned handle_hash(void *key)
443{
444    return PTR_TO_UINT(key);
445}
446
447static int handle_compare(void *key1, void *key2)
448{
449    return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
450}
451
452static bool
453vc4_get_chip_info(struct vc4_screen *screen)
454{
455        struct drm_vc4_get_param ident0 = {
456                .param = DRM_VC4_PARAM_V3D_IDENT0,
457        };
458        struct drm_vc4_get_param ident1 = {
459                .param = DRM_VC4_PARAM_V3D_IDENT1,
460        };
461        int ret;
462
463        ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_PARAM, &ident0);
464        if (ret != 0) {
465                if (errno == EINVAL) {
466                        /* Backwards compatibility with 2835 kernels which
467                         * only do V3D 2.1.
468                         */
469                        screen->v3d_ver = 21;
470                        return true;
471                } else {
472                        fprintf(stderr, "Couldn't get V3D IDENT0: %s\n",
473                                strerror(errno));
474                        return false;
475                }
476        }
477        ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_PARAM, &ident1);
478        if (ret != 0) {
479                fprintf(stderr, "Couldn't get V3D IDENT1: %s\n",
480                        strerror(errno));
481                return false;
482        }
483
484        uint32_t major = (ident0.value >> 24) & 0xff;
485        uint32_t minor = (ident1.value >> 0) & 0xf;
486        screen->v3d_ver = major * 10 + minor;
487
488        if (screen->v3d_ver != 21 && screen->v3d_ver != 26) {
489                fprintf(stderr,
490                        "V3D %d.%d not supported by this version of Mesa.\n",
491                        screen->v3d_ver / 10,
492                        screen->v3d_ver % 10);
493                return false;
494        }
495
496        return true;
497}
498
499struct pipe_screen *
500vc4_screen_create(int fd, struct renderonly *ro)
501{
502        struct vc4_screen *screen = rzalloc(NULL, struct vc4_screen);
503        uint64_t syncobj_cap = 0;
504        struct pipe_screen *pscreen;
505        int err;
506
507        pscreen = &screen->base;
508
509        pscreen->destroy = vc4_screen_destroy;
510        pscreen->get_param = vc4_screen_get_param;
511        pscreen->get_paramf = vc4_screen_get_paramf;
512        pscreen->get_shader_param = vc4_screen_get_shader_param;
513        pscreen->context_create = vc4_context_create;
514        pscreen->is_format_supported = vc4_screen_is_format_supported;
515
516        screen->fd = fd;
517        if (ro) {
518                screen->ro = renderonly_dup(ro);
519                if (!screen->ro) {
520                        fprintf(stderr, "Failed to dup renderonly object\n");
521                        ralloc_free(screen);
522                        return NULL;
523                }
524        }
525
526        list_inithead(&screen->bo_cache.time_list);
527        (void) mtx_init(&screen->bo_handles_mutex, mtx_plain);
528        screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
529
530        screen->has_control_flow =
531                vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
532        screen->has_etc1 =
533                vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_ETC1);
534        screen->has_threaded_fs =
535                vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
536        screen->has_madvise =
537                vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_MADVISE);
538        screen->has_perfmon_ioctl =
539                vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_PERFMON);
540
541        err = drmGetCap(fd, DRM_CAP_SYNCOBJ, &syncobj_cap);
542        if (err == 0 && syncobj_cap)
543                screen->has_syncobj = true;
544
545        if (!vc4_get_chip_info(screen))
546                goto fail;
547
548        util_cpu_detect();
549
550        slab_create_parent(&screen->transfer_pool, sizeof(struct vc4_transfer), 16);
551
552        vc4_fence_screen_init(screen);
553
554        vc4_debug = debug_get_option_vc4_debug();
555        if (vc4_debug & VC4_DEBUG_SHADERDB)
556                vc4_debug |= VC4_DEBUG_NORAST;
557
558#ifdef USE_VC4_SIMULATOR
559        vc4_simulator_init(screen);
560#endif
561
562        vc4_resource_screen_init(pscreen);
563
564        pscreen->get_name = vc4_screen_get_name;
565        pscreen->get_vendor = vc4_screen_get_vendor;
566        pscreen->get_device_vendor = vc4_screen_get_vendor;
567        pscreen->get_compiler_options = vc4_screen_get_compiler_options;
568        pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers;
569
570        if (screen->has_perfmon_ioctl) {
571                pscreen->get_driver_query_group_info = vc4_get_driver_query_group_info;
572                pscreen->get_driver_query_info = vc4_get_driver_query_info;
573        }
574
575        return pscreen;
576
577fail:
578        close(fd);
579        ralloc_free(pscreen);
580        return NULL;
581}
582